aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Harring <ferringb@gmail.com>2014-02-21 14:23:21 +0000
committerBrian Harring <ferringb@gmail.com>2014-02-21 14:23:21 +0000
commitf83644ffd5f86b82ec33f97e6e0e360157f59e7c (patch)
treea4aff586cae195e0f1e37da5224c43d52f63a1ad
parentUpdate the config to cvs2svn v2.4.0; no changes in what was defined, just man... (diff)
downloadgit-conversion-tools-f83644ffd5f86b82ec33f97e6e0e360157f59e7c.tar.gz
git-conversion-tools-f83644ffd5f86b82ec33f97e6e0e360157f59e7c.tar.bz2
git-conversion-tools-f83644ffd5f86b82ec33f97e6e0e360157f59e7c.zip
Drop the cvs2svn libs; no longer needed/used
-rw-r--r--cvs2svn_lib/__init__.py18
-rw-r--r--cvs2svn_lib/apple_single_filter.py292
-rw-r--r--cvs2svn_lib/artifact.py59
-rw-r--r--cvs2svn_lib/artifact_manager.py256
-rw-r--r--cvs2svn_lib/bzr_run_options.py175
-rw-r--r--cvs2svn_lib/changeset.py269
-rw-r--r--cvs2svn_lib/changeset_database.py70
-rw-r--r--cvs2svn_lib/changeset_graph.py456
-rw-r--r--cvs2svn_lib/changeset_graph_link.py149
-rw-r--r--cvs2svn_lib/changeset_graph_node.py50
-rw-r--r--cvs2svn_lib/check_dependencies_pass.py144
-rw-r--r--cvs2svn_lib/checkout_internal.py778
-rw-r--r--cvs2svn_lib/collect_data.py1431
-rw-r--r--cvs2svn_lib/common.py409
-rw-r--r--cvs2svn_lib/config.py221
-rw-r--r--cvs2svn_lib/context.py93
-rw-r--r--cvs2svn_lib/cvs_file.py287
-rw-r--r--cvs2svn_lib/cvs_file_database.py75
-rw-r--r--cvs2svn_lib/cvs_file_items.py1075
-rw-r--r--cvs2svn_lib/cvs_item.py901
-rw-r--r--cvs2svn_lib/cvs_item_database.py248
-rw-r--r--cvs2svn_lib/cvs_revision_manager.py85
-rw-r--r--cvs2svn_lib/database.py322
-rw-r--r--cvs2svn_lib/dumpfile_delegate.py510
-rw-r--r--cvs2svn_lib/fill_source.py192
-rw-r--r--cvs2svn_lib/fulltext_revision_recorder.py127
-rw-r--r--cvs2svn_lib/git_output_option.py658
-rw-r--r--cvs2svn_lib/git_revision_recorder.py114
-rw-r--r--cvs2svn_lib/git_run_options.py274
-rw-r--r--cvs2svn_lib/key_generator.py45
-rw-r--r--cvs2svn_lib/log.py174
-rw-r--r--cvs2svn_lib/main.py117
-rw-r--r--cvs2svn_lib/man_writer.py197
-rw-r--r--cvs2svn_lib/metadata.py26
-rw-r--r--cvs2svn_lib/metadata_database.py102
-rw-r--r--cvs2svn_lib/openings_closings.py236
-rw-r--r--cvs2svn_lib/output_option.py85
-rw-r--r--cvs2svn_lib/pass_manager.py215
-rw-r--r--cvs2svn_lib/passes.py1837
-rw-r--r--cvs2svn_lib/persistence_manager.py106
-rw-r--r--cvs2svn_lib/process.py116
-rw-r--r--cvs2svn_lib/project.py219
-rw-r--r--cvs2svn_lib/property_setters.py385
-rw-r--r--cvs2svn_lib/rcs_revision_manager.py51
-rw-r--r--cvs2svn_lib/rcs_stream.py149
-rw-r--r--cvs2svn_lib/record_table.py399
-rw-r--r--cvs2svn_lib/repository_delegate.py98
-rw-r--r--cvs2svn_lib/repository_mirror.py897
-rw-r--r--cvs2svn_lib/revision_manager.py189
-rw-r--r--cvs2svn_lib/run_options.py1035
-rw-r--r--cvs2svn_lib/serializer.py146
-rw-r--r--cvs2svn_lib/stats_keeper.py189
-rw-r--r--cvs2svn_lib/stdout_delegate.py107
-rw-r--r--cvs2svn_lib/svn_commit.py381
-rw-r--r--cvs2svn_lib/svn_commit_creator.py217
-rw-r--r--cvs2svn_lib/svn_commit_item.py50
-rw-r--r--cvs2svn_lib/svn_output_option.py753
-rw-r--r--cvs2svn_lib/svn_repository_delegate.py121
-rw-r--r--cvs2svn_lib/svn_revision_range.py171
-rw-r--r--cvs2svn_lib/svn_run_options.py543
-rw-r--r--cvs2svn_lib/symbol.py246
-rw-r--r--cvs2svn_lib/symbol_database.py68
-rw-r--r--cvs2svn_lib/symbol_statistics.py521
-rw-r--r--cvs2svn_lib/symbol_strategy.py685
-rw-r--r--cvs2svn_lib/symbol_transform.py236
-rw-r--r--cvs2svn_lib/time_range.py44
-rw-r--r--cvs2svn_lib/version.py27
-rw-r--r--cvs2svn_rcsparse/__init__.py26
-rw-r--r--cvs2svn_rcsparse/common.py324
-rw-r--r--cvs2svn_rcsparse/debug.py122
-rw-r--r--cvs2svn_rcsparse/default.py172
-rw-r--r--cvs2svn_rcsparse/parse_rcs_file.py73
-rw-r--r--cvs2svn_rcsparse/rcparse_redundant_work.patch99
-rw-r--r--cvs2svn_rcsparse/run-tests.py73
-rw-r--r--cvs2svn_rcsparse/texttools.py348
75 files changed, 0 insertions, 22158 deletions
diff --git a/cvs2svn_lib/__init__.py b/cvs2svn_lib/__init__.py
deleted file mode 100644
index 838d4c6..0000000
--- a/cvs2svn_lib/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This package contains modules that support cvs2svn."""
-
diff --git a/cvs2svn_lib/apple_single_filter.py b/cvs2svn_lib/apple_single_filter.py
deleted file mode 100644
index 95fa9cb..0000000
--- a/cvs2svn_lib/apple_single_filter.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""A stream filter for extracting the data fork from AppleSingle data.
-
-Some Macintosh CVS clients store resource fork data along with the
-contents of the file (called the data fork) by encoding both in an
-'AppleSingle' data stream before storing them to CVS. This file
-contains a stream filter for extracting the data fork from such data
-streams. (Any other forks are discarded.)
-
-See the following for some random information about this format and
-how it is used by Macintosh CVS clients:
-
- http://users.phg-online.de/tk/netatalk/doc/Apple/v1/
- http://rfc.net/rfc1740.html
- http://ximbiot.com/cvs/cvshome/cyclic/cvs/dev-mac.html
- http://www.maccvs.org/faq.html#resfiles
- http://www.heilancoo.net/MacCVSClient/MacCVSClientDoc/storage-formats.html
-
-"""
-
-
-import struct
-from cStringIO import StringIO
-
-
-class AppleSingleFormatError(IOError):
- """The stream was not in correct AppleSingle format."""
-
- pass
-
-
-class AppleSingleIncorrectMagicError(AppleSingleFormatError):
- """The file didn't start with the correct magic number."""
-
- def __init__(self, data_read, eof):
- AppleSingleFormatError.__init__(self)
- self.data_read = data_read
- self.eof = eof
-
-
-class AppleSingleEOFError(AppleSingleFormatError):
- """EOF was reached where AppleSingle doesn't allow it."""
-
- pass
-
-
-class AppleSingleFilter(object):
- """A stream that reads the data fork from an AppleSingle stream.
-
- If the constructor discovers that the file is not a legitimate
- AppleSingle stream, then it raises an AppleSingleFormatError. In
- the special case that the magic number is incorrect, it raises
- AppleSingleIncorrectMagicError with data_read set to the data that
- have been read so far from the input stream. (This allows the
- caller the option to fallback to treating the input stream as a
- normal binary data stream.)"""
-
- # The header is:
- #
- # Magic number 4 bytes
- # Version number 4 bytes
- # File system or filler 16 bytes
- # Number of entries 2 bytes
- magic_struct = '>i'
- magic_len = struct.calcsize(magic_struct)
-
- # The part of the header after the magic number:
- rest_of_header_struct = '>i16sH'
- rest_of_header_len = struct.calcsize(rest_of_header_struct)
-
- # Each entry is:
- #
- # Entry ID 4 bytes
- # Offset 4 bytes
- # Length 4 bytes
- entry_struct = '>iii'
- entry_len = struct.calcsize(entry_struct)
-
- apple_single_magic = 0x00051600
- apple_single_version_1 = 0x00010000
- apple_single_version_2 = 0x00020000
- apple_single_filler = '\0' * 16
-
- apple_single_data_fork_entry_id = 1
-
- def __init__(self, stream):
- self.stream = stream
-
- # Check for the AppleSingle magic number:
- s = self._read_exactly(self.magic_len)
- if len(s) < self.magic_len:
- raise AppleSingleIncorrectMagicError(s, True)
-
- (magic,) = struct.unpack(self.magic_struct, s)
- if magic != self.apple_single_magic:
- raise AppleSingleIncorrectMagicError(s, False)
-
- # Read the rest of the header:
- s = self._read_exactly(self.rest_of_header_len)
- if len(s) < self.rest_of_header_len:
- raise AppleSingleEOFError('AppleSingle header incomplete')
-
- (version, filler, num_entries) = \
- struct.unpack(self.rest_of_header_struct, s)
-
- if version == self.apple_single_version_1:
- self._prepare_apple_single_v1_file(num_entries)
- elif version == self.apple_single_version_2:
- if filler != self.apple_single_filler:
- raise AppleSingleFormatError('Incorrect filler')
- self._prepare_apple_single_v2_file(num_entries)
- else:
- raise AppleSingleFormatError('Unknown AppleSingle version')
-
- def _read_exactly(self, size):
- """Read and return exactly SIZE characters from the stream.
-
- This method is to deal with the fact that stream.read(size) is
- allowed to return less than size characters. If EOF is reached
- before SIZE characters have been read, return the characters that
- have been read so far."""
-
- retval = []
- length_remaining = size
- while length_remaining > 0:
- s = self.stream.read(length_remaining)
- if not s:
- break
- retval.append(s)
- length_remaining -= len(s)
-
- return ''.join(retval)
-
- def _prepare_apple_single_file(self, num_entries):
- entries = self._read_exactly(num_entries * self.entry_len)
- if len(entries) < num_entries * self.entry_len:
- raise AppleSingleEOFError('Incomplete entries list')
-
- for i in range(num_entries):
- entry = entries[i * self.entry_len : (i + 1) * self.entry_len]
- (entry_id, offset, length) = struct.unpack(self.entry_struct, entry)
- if entry_id == self.apple_single_data_fork_entry_id:
- break
- else:
- raise AppleSingleFormatError('No data fork found')
-
- # The data fork is located at [offset : offset + length]. Read up
- # to the start of the data:
- n = offset - self.magic_len - self.rest_of_header_len - len(entries)
- if n < 0:
- raise AppleSingleFormatError('Invalid offset to AppleSingle data fork')
-
- max_chunk_size = 65536
- while n > 0:
- s = self.stream.read(min(n, max_chunk_size))
- if not s:
- raise AppleSingleEOFError(
- 'Offset to AppleSingle data fork past end of file'
- )
- n -= len(s)
-
- self.length_remaining = length
-
- def _prepare_apple_single_v1_file(self, num_entries):
- self._prepare_apple_single_file(num_entries)
-
- def _prepare_apple_single_v2_file(self, num_entries):
- self._prepare_apple_single_file(num_entries)
-
- def read(self, size=-1):
- if size == 0 or self.length_remaining == 0:
- return ''
- elif size < 0:
- s = self._read_exactly(self.length_remaining)
- if len(s) < self.length_remaining:
- raise AppleSingleEOFError('AppleSingle data fork truncated')
- self.length_remaining = 0
- return s
- else:
- # The length of this read is allowed to be shorter than the
- # requested size:
- s = self.stream.read(min(size, self.length_remaining))
- if not s:
- raise AppleSingleEOFError()
- self.length_remaining -= len(s)
- return s
-
- def close(self):
- self.stream.close()
- self.stream = None
-
-
-class CompoundStream(object):
- """A stream that reads from a series of streams, one after the other."""
-
- def __init__(self, *streams):
- self.streams = list(streams)
- self.stream_index = 0
-
- def read(self, size=-1):
- if size < 0:
- retval = []
- while self.stream_index < len(self.streams):
- retval.append(self.streams[self.stream_index].read())
- self.stream_index += 1
- return ''.join(retval)
- else:
- while self.stream_index < len(self.streams):
- s = self.streams[self.stream_index].read(size)
- if s:
- # This may not be the full size requested, but that is OK:
- return s
- else:
- # That stream was empty; proceed to the next stream:
- self.stream_index += 1
-
- # No streams are left:
- return ''
-
- def close(self):
- for stream in self.streams:
- stream.close()
- self.streams = None
-
-
-def get_maybe_apple_single_stream(stream):
- """Treat STREAM as AppleSingle if possible; otherwise treat it literally.
-
- If STREAM is in AppleSingle format, then return a stream that will
- output the data fork of the original stream. Otherwise, return a
- stream that will output the original file contents literally.
-
- Be careful not to read from STREAM after it has already hit EOF."""
-
- try:
- return AppleSingleFilter(stream)
- except AppleSingleIncorrectMagicError, e:
- # This is OK; the file is not AppleSingle, so we read it normally:
- string_io = StringIO(e.data_read)
- if e.eof:
- # The original stream already reached EOF, so the part already
- # read contains the complete file contents:
- return string_io
- else:
- # The stream needs to output the part already read followed by
- # whatever hasn't been read of the original stream:
- return CompoundStream(string_io, stream)
-
-
-if __name__ == '__main__':
- # For fun and testing, allow use of this file as a pipe if it is
- # invoked as a script. Specifically, if stdin is in AppleSingle
- # format, then output only its data fork; otherwise, output it
- # unchanged.
- #
- # This might not work on systems where sys.stdin is opened in text
- # mode.
- #
- # Remember to set PYTHONPATH to point to the main cvs2svn directory.
-
- import sys
-
- #CHUNK_SIZE = -1
- CHUNK_SIZE = 100
-
- f = get_maybe_apple_single_stream(sys.stdin)
-
- if CHUNK_SIZE < 0:
- sys.stdout.write(f.read())
- else:
- while True:
- s = f.read(CHUNK_SIZE)
- if not s:
- break
- sys.stdout.write(s)
-
-
diff --git a/cvs2svn_lib/artifact.py b/cvs2svn_lib/artifact.py
deleted file mode 100644
index 99d6945..0000000
--- a/cvs2svn_lib/artifact.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module defines Artifact types to be used with an ArtifactManager."""
-
-
-import os
-
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.log import Log
-
-
-class Artifact(object):
- """An object that is created, used across passes, then cleaned up."""
-
- def __init__(self):
- # The set of passes that need this artifact. This field is
- # maintained by ArtifactManager.
- self._passes_needed = set()
-
- def cleanup(self):
- """This artifact is no longer needed; clean it up."""
-
- pass
-
-
-class TempFile(Artifact):
- """A temporary file that can be used across cvs2svn passes."""
-
- def __init__(self, basename):
- Artifact.__init__(self)
- self.basename = basename
-
- def _get_filename(self):
- return Ctx().get_temp_filename(self.basename)
-
- filename = property(_get_filename)
-
- def cleanup(self):
- Log().verbose("Deleting", self.filename)
- os.unlink(self.filename)
-
- def __str__(self):
- return 'Temporary file %r' % (self.filename,)
-
-
diff --git a/cvs2svn_lib/artifact_manager.py b/cvs2svn_lib/artifact_manager.py
deleted file mode 100644
index 08f0ec7..0000000
--- a/cvs2svn_lib/artifact_manager.py
+++ /dev/null
@@ -1,256 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module manages the artifacts produced by conversion passes."""
-
-
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.artifact import TempFile
-
-
-class ArtifactNotActiveError(Exception):
- """An artifact was requested when no passes that have registered
- that they need it are active."""
-
- def __init__(self, artifact_name):
- Exception.__init__(
- self, 'Artifact %s is not currently active' % artifact_name)
-
-
-class ArtifactManager:
- """Manage artifacts that are created by one pass but needed by others.
-
- This class is responsible for cleaning up artifacts once they are no
- longer needed. The trick is that cvs2svn can be run pass by pass,
- so not all passes might be executed during a specific program run.
-
- To use this class:
-
- - Call artifact_manager.set_artifact(name, artifact) once for each
- known artifact.
-
- - Call artifact_manager.creates(which_pass, artifact) to indicate
- that WHICH_PASS is the pass that creates ARTIFACT.
-
- - Call artifact_manager.uses(which_pass, artifact) to indicate that
- WHICH_PASS needs to use ARTIFACT.
-
- There are also helper methods register_temp_file(),
- register_artifact_needed(), and register_temp_file_needed() which
- combine some useful operations.
-
- Then, in pass order:
-
- - Call pass_skipped() for any passes that were already executed
- during a previous cvs2svn run.
-
- - Call pass_started() when a pass is about to start execution.
-
- - If a pass that has been started will be continued during the next
- program run, then call pass_continued().
-
- - If a pass that has been started finishes execution, call
- pass_done(), to allow any artifacts that won't be needed anymore
- to be cleaned up.
-
- - Call pass_deferred() for any passes that have been deferred to a
- future cvs2svn run.
-
- Finally:
-
- - Call check_clean() to verify that all artifacts have been
- accounted for."""
-
- def __init__(self):
- # A map { artifact_name : artifact } of known artifacts.
- self._artifacts = { }
-
- # A map { pass : set_of_artifacts }, where set_of_artifacts is a
- # set of artifacts needed by the pass.
- self._pass_needs = { }
-
- # A set of passes that are currently being executed.
- self._active_passes = set()
-
- def set_artifact(self, name, artifact):
- """Add ARTIFACT to the list of artifacts that we manage.
-
- Store it under NAME."""
-
- assert name not in self._artifacts
- self._artifacts[name] = artifact
-
- def get_artifact(self, name):
- """Return the artifact with the specified name.
-
- If the artifact does not currently exist, raise a KeyError. If it
- is not registered as being needed by one of the active passes,
- raise an ArtifactNotActiveError."""
-
- artifact = self._artifacts[name]
- for active_pass in self._active_passes:
- if artifact in self._pass_needs[active_pass]:
- # OK
- return artifact
- else:
- raise ArtifactNotActiveError(name)
-
- def creates(self, which_pass, artifact):
- """Register that WHICH_PASS creates ARTIFACT.
-
- ARTIFACT must already have been registered."""
-
- # An artifact is automatically "needed" in the pass in which it is
- # created:
- self.uses(which_pass, artifact)
-
- def uses(self, which_pass, artifact):
- """Register that WHICH_PASS uses ARTIFACT.
-
- ARTIFACT must already have been registered."""
-
- artifact._passes_needed.add(which_pass)
- if which_pass in self._pass_needs:
- self._pass_needs[which_pass].add(artifact)
- else:
- self._pass_needs[which_pass] = set([artifact])
-
- def register_temp_file(self, basename, which_pass):
- """Register a temporary file with base name BASENAME as an artifact.
-
- Return the filename of the temporary file."""
-
- artifact = TempFile(basename)
- self.set_artifact(basename, artifact)
- self.creates(which_pass, artifact)
-
- def get_temp_file(self, basename):
- """Return the filename of the temporary file with the specified BASENAME.
-
- If the temporary file is not an existing, registered TempFile,
- raise a KeyError."""
-
- return self.get_artifact(basename).filename
-
- def register_artifact_needed(self, artifact_name, which_pass):
- """Register that WHICH_PASS uses the artifact named ARTIFACT_NAME.
-
- An artifact with this name must already have been registered."""
-
- artifact = self._artifacts[artifact_name]
- artifact._passes_needed.add(which_pass)
- if which_pass in self._pass_needs:
- self._pass_needs[which_pass].add(artifact)
- else:
- self._pass_needs[which_pass] = set([artifact,])
-
- def register_temp_file_needed(self, basename, which_pass):
- """Register that a temporary file is needed by WHICH_PASS.
-
- Register that the temporary file with base name BASENAME is needed
- by WHICH_PASS."""
-
- self.register_artifact_needed(basename, which_pass)
-
- def _unregister_artifacts(self, which_pass):
- """Unregister any artifacts that were needed for WHICH_PASS.
-
- Return a list of artifacts that are no longer needed at all."""
-
- try:
- artifacts = list(self._pass_needs[which_pass])
- except KeyError:
- # No artifacts were needed for that pass:
- return []
-
- del self._pass_needs[which_pass]
-
- unneeded_artifacts = []
- for artifact in artifacts:
- artifact._passes_needed.remove(which_pass)
- if not artifact._passes_needed:
- unneeded_artifacts.append(artifact)
-
- return unneeded_artifacts
-
- def pass_skipped(self, which_pass):
- """WHICH_PASS was executed during a previous cvs2svn run.
-
- Its artifacts were created then, and any artifacts that would
- normally be cleaned up after this pass have already been cleaned
- up."""
-
- self._unregister_artifacts(which_pass)
-
- def pass_started(self, which_pass):
- """WHICH_PASS is starting."""
-
- self._active_passes.add(which_pass)
-
- def pass_continued(self, which_pass):
- """WHICH_PASS will be continued during the next program run.
-
- WHICH_PASS, which has already been started, will be continued
- during the next program run. Unregister any artifacts that would
- be cleaned up at the end of WHICH_PASS without actually cleaning
- them up."""
-
- self._active_passes.remove(which_pass)
- self._unregister_artifacts(which_pass)
-
- def pass_done(self, which_pass, skip_cleanup):
- """WHICH_PASS is done.
-
- Clean up all artifacts that are no longer needed. If SKIP_CLEANUP
- is True, then just do the bookkeeping without actually calling
- artifact.cleanup()."""
-
- self._active_passes.remove(which_pass)
- artifacts = self._unregister_artifacts(which_pass)
- if not skip_cleanup:
- for artifact in artifacts:
- artifact.cleanup()
-
- def pass_deferred(self, which_pass):
- """WHICH_PASS is being deferred until a future cvs2svn run.
-
- Unregister any artifacts that would be cleaned up during
- WHICH_PASS."""
-
- self._unregister_artifacts(which_pass)
-
- def check_clean(self):
- """All passes have been processed.
-
- Output a warning messages if all artifacts have not been accounted
- for. (This is mainly a consistency check, that no artifacts were
- registered under nonexistent passes.)"""
-
- unclean_artifacts = [
- str(artifact)
- for artifact in self._artifacts.values()
- if artifact._passes_needed]
-
- if unclean_artifacts:
- Log().warn(
- 'INTERNAL: The following artifacts were not cleaned up:\n %s\n'
- % ('\n '.join(unclean_artifacts)))
-
-
-# The default ArtifactManager instance:
-artifact_manager = ArtifactManager()
-
-
diff --git a/cvs2svn_lib/bzr_run_options.py b/cvs2svn_lib/bzr_run_options.py
deleted file mode 100644
index 5332dff..0000000
--- a/cvs2svn_lib/bzr_run_options.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module manages cvs2bzr run options."""
-
-
-import sys
-import datetime
-import codecs
-
-from cvs2svn_lib.version import VERSION
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.run_options import not_both
-from cvs2svn_lib.run_options import RunOptions
-from cvs2svn_lib.run_options import ContextOption
-from cvs2svn_lib.run_options import IncompatibleOption
-from cvs2svn_lib.run_options import authors
-from cvs2svn_lib.man_writer import ManWriter
-from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
-from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
-from cvs2svn_lib.git_run_options import GitRunOptions
-from cvs2svn_lib.git_output_option import GitRevisionInlineWriter
-from cvs2svn_lib.git_output_option import GitOutputOption
-from cvs2svn_lib.revision_manager import NullRevisionRecorder
-from cvs2svn_lib.revision_manager import NullRevisionExcluder
-
-
-short_desc = 'convert a cvs repository into a Bazaar repository'
-
-synopsis = """\
-.B cvs2bzr
-[\\fIOPTION\\fR]... \\fIOUTPUT-OPTIONS CVS-REPOS-PATH\\fR
-.br
-.B cvs2bzr
-[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
-"""
-
-description="""\
-Convert a CVS repository into a Bazaar repository, including history.
-
-"""
-long_desc = """\
-Create a new Bazaar repository based on the version history stored in a
-CVS repository. Each CVS commit will be mirrored in the Bazaar
-repository, including such information as date of commit and id of the
-committer.
-.P
-The output of this program is a "fast-import dumpfile", which
-can be loaded into a Bazaar repository using the Bazaar FastImport
-Plugin, available from https://launchpad.net/bzr-fastimport.
-
-.P
-\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
-repository that you want to convert. This path doesn't have to be the
-top level directory of a CVS repository; it can point at a project
-within a repository, in which case only that project will be
-converted. This path or one of its parent directories has to contain
-a subdirectory called CVSROOT (though the CVSROOT directory can be
-empty).
-.P
-It is not possible directly to convert a CVS repository to which you
-only have remote access, but the FAQ describes tools that may be used
-to create a local copy of a remote CVS repository.
-"""
-
-files = """\
-A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
-\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
-"""
-
-see_also = [
- ('cvs', '1'),
- ('bzr', '1'),
- ]
-
-
-class BzrRunOptions(GitRunOptions):
-
- def get_description(self):
- return description
-
- def _get_output_options_group(self):
- group = RunOptions._get_output_options_group(self)
-
- group.add_option(IncompatibleOption(
- '--dumpfile', type='string',
- action='store',
- help='path to which the data should be written',
- man_help=(
- 'Write the blobs and revision data to \\fIpath\\fR.'
- ),
- metavar='PATH',
- ))
- group.add_option(ContextOption(
- '--dry-run',
- action='store_true',
- help=(
- 'do not create any output; just print what would happen.'
- ),
- man_help=(
- 'Do not create any output; just print what would happen.'
- ),
- ))
-
- return group
-
- def callback_manpage(self, option, opt_str, value, parser):
- f = codecs.getwriter('utf_8')(sys.stdout)
- ManWriter(
- parser,
- section='1',
- date=datetime.date.today(),
- source='Version %s' % (VERSION,),
- manual='User Commands',
- short_desc=short_desc,
- synopsis=synopsis,
- long_desc=long_desc,
- files=files,
- authors=authors,
- see_also=see_also,
- ).write_manpage(f)
- sys.exit(0)
-
- def process_io_options(self):
- """Process input/output options.
-
- Process options related to extracting data from the CVS repository
- and writing to a Bazaar-friendly fast-import file."""
-
- ctx = Ctx()
- options = self.options
-
- not_both(options.use_rcs, '--use-rcs',
- options.use_cvs, '--use-cvs')
-
- if options.use_rcs:
- revision_reader = RCSRevisionReader(
- co_executable=options.co_executable
- )
- else:
- # --use-cvs is the default:
- revision_reader = CVSRevisionReader(
- cvs_executable=options.cvs_executable
- )
-
- if not ctx.dry_run and not options.dumpfile:
- raise FatalError("must pass '--dry-run' or '--dumpfile' option.")
-
- ctx.revision_recorder = NullRevisionRecorder()
- ctx.revision_excluder = NullRevisionExcluder()
- ctx.revision_reader = None
-
- ctx.output_option = GitOutputOption(
- options.dumpfile,
- GitRevisionInlineWriter(revision_reader),
- max_merges=None,
- # Optional map from CVS author names to bzr author names:
- author_transforms={}, # FIXME
- )
-
-
diff --git a/cvs2svn_lib/changeset.py b/cvs2svn_lib/changeset.py
deleted file mode 100644
index 1022e0a..0000000
--- a/cvs2svn_lib/changeset.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Manage change sets."""
-
-
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.symbol import Branch
-from cvs2svn_lib.symbol import Tag
-from cvs2svn_lib.time_range import TimeRange
-from cvs2svn_lib.changeset_graph_node import ChangesetGraphNode
-
-
-class Changeset(object):
- """A set of cvs_items that might potentially form a single change set."""
-
- def __init__(self, id, cvs_item_ids):
- self.id = id
- self.cvs_item_ids = list(cvs_item_ids)
-
- def iter_cvs_items(self):
- """Yield the CVSItems within this Changeset."""
-
- for (id, cvs_item) in Ctx()._cvs_items_db.get_many(self.cvs_item_ids):
- assert cvs_item is not None
- yield cvs_item
-
- def get_projects_opened(self):
- """Return the set of projects that might be opened by this changeset."""
-
- raise NotImplementedError()
-
- def create_graph_node(self, cvs_item_to_changeset_id):
- """Return a ChangesetGraphNode for this Changeset."""
-
- raise NotImplementedError()
-
- def create_split_changeset(self, id, cvs_item_ids):
- """Return a Changeset with the specified contents.
-
- This method is only implemented for changesets that can be split.
- The type of the new changeset should be the same as that of SELF,
- and any other information from SELF should also be copied to the
- new changeset."""
-
- raise NotImplementedError()
-
- def __getstate__(self):
- return (self.id, self.cvs_item_ids,)
-
- def __setstate__(self, state):
- (self.id, self.cvs_item_ids,) = state
-
- def __cmp__(self, other):
- raise NotImplementedError()
-
- def __str__(self):
- raise NotImplementedError()
-
- def __repr__(self):
- return '%s [%s]' % (
- self, ', '.join(['%x' % id for id in self.cvs_item_ids]),)
-
-
-class RevisionChangeset(Changeset):
- """A Changeset consisting of CVSRevisions."""
-
- _sort_order = 3
-
- def create_graph_node(self, cvs_item_to_changeset_id):
- time_range = TimeRange()
- pred_ids = set()
- succ_ids = set()
-
- for cvs_item in self.iter_cvs_items():
- time_range.add(cvs_item.timestamp)
-
- for pred_id in cvs_item.get_pred_ids():
- changeset_id = cvs_item_to_changeset_id.get(pred_id)
- if changeset_id is not None:
- pred_ids.add(changeset_id)
-
- for succ_id in cvs_item.get_succ_ids():
- changeset_id = cvs_item_to_changeset_id.get(succ_id)
- if changeset_id is not None:
- succ_ids.add(changeset_id)
-
- return ChangesetGraphNode(self, time_range, pred_ids, succ_ids)
-
- def create_split_changeset(self, id, cvs_item_ids):
- return RevisionChangeset(id, cvs_item_ids)
-
- def __cmp__(self, other):
- return cmp(self._sort_order, other._sort_order) \
- or cmp(self.id, other.id)
-
- def __str__(self):
- return 'RevisionChangeset<%x>' % (self.id,)
-
-
-class OrderedChangeset(Changeset):
- """A Changeset of CVSRevisions whose preliminary order is known.
-
- The first changeset ordering involves only RevisionChangesets, and
- results in a full ordering of RevisionChangesets (i.e., a linear
- chain of dependencies with the order consistent with the
- dependencies). These OrderedChangesets form the skeleton for the
- full topological sort that includes SymbolChangesets as well."""
-
- _sort_order = 2
-
- def __init__(self, id, cvs_item_ids, ordinal, prev_id, next_id):
- Changeset.__init__(self, id, cvs_item_ids)
-
- # The order of this changeset among all OrderedChangesets:
- self.ordinal = ordinal
-
- # The changeset id of the previous OrderedChangeset, or None if
- # this is the first OrderedChangeset:
- self.prev_id = prev_id
-
- # The changeset id of the next OrderedChangeset, or None if this
- # is the last OrderedChangeset:
- self.next_id = next_id
-
- def get_projects_opened(self):
- retval = set()
- for cvs_item in self.iter_cvs_items():
- retval.add(cvs_item.cvs_file.project)
- return retval
-
- def create_graph_node(self, cvs_item_to_changeset_id):
- time_range = TimeRange()
-
- pred_ids = set()
- succ_ids = set()
-
- if self.prev_id is not None:
- pred_ids.add(self.prev_id)
-
- if self.next_id is not None:
- succ_ids.add(self.next_id)
-
- for cvs_item in self.iter_cvs_items():
- time_range.add(cvs_item.timestamp)
-
- for pred_id in cvs_item.get_symbol_pred_ids():
- changeset_id = cvs_item_to_changeset_id.get(pred_id)
- if changeset_id is not None:
- pred_ids.add(changeset_id)
-
- for succ_id in cvs_item.get_symbol_succ_ids():
- changeset_id = cvs_item_to_changeset_id.get(succ_id)
- if changeset_id is not None:
- succ_ids.add(changeset_id)
-
- return ChangesetGraphNode(self, time_range, pred_ids, succ_ids)
-
- def __getstate__(self):
- return (
- Changeset.__getstate__(self),
- self.ordinal, self.prev_id, self.next_id,)
-
- def __setstate__(self, state):
- (changeset_state, self.ordinal, self.prev_id, self.next_id,) = state
- Changeset.__setstate__(self, changeset_state)
-
- def __cmp__(self, other):
- return cmp(self._sort_order, other._sort_order) \
- or cmp(self.id, other.id)
-
- def __str__(self):
- return 'OrderedChangeset<%x(%d)>' % (self.id, self.ordinal,)
-
-
-class SymbolChangeset(Changeset):
- """A Changeset consisting of CVSSymbols."""
-
- def __init__(self, id, symbol, cvs_item_ids):
- Changeset.__init__(self, id, cvs_item_ids)
- self.symbol = symbol
-
- def get_projects_opened(self):
- # A SymbolChangeset can never open a project.
- return set()
-
- def create_graph_node(self, cvs_item_to_changeset_id):
- pred_ids = set()
- succ_ids = set()
-
- for cvs_item in self.iter_cvs_items():
- for pred_id in cvs_item.get_pred_ids():
- changeset_id = cvs_item_to_changeset_id.get(pred_id)
- if changeset_id is not None:
- pred_ids.add(changeset_id)
-
- for succ_id in cvs_item.get_succ_ids():
- changeset_id = cvs_item_to_changeset_id.get(succ_id)
- if changeset_id is not None:
- succ_ids.add(changeset_id)
-
- return ChangesetGraphNode(self, TimeRange(), pred_ids, succ_ids)
-
- def __cmp__(self, other):
- return cmp(self._sort_order, other._sort_order) \
- or cmp(self.symbol, other.symbol) \
- or cmp(self.id, other.id)
-
- def __getstate__(self):
- return (Changeset.__getstate__(self), self.symbol.id,)
-
- def __setstate__(self, state):
- (changeset_state, symbol_id) = state
- Changeset.__setstate__(self, changeset_state)
- self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
-
-
-class BranchChangeset(SymbolChangeset):
- """A Changeset consisting of CVSBranches."""
-
- _sort_order = 1
-
- def create_split_changeset(self, id, cvs_item_ids):
- return BranchChangeset(id, self.symbol, cvs_item_ids)
-
- def __str__(self):
- return 'BranchChangeset<%x>("%s")' % (self.id, self.symbol,)
-
-
-class TagChangeset(SymbolChangeset):
- """A Changeset consisting of CVSTags."""
-
- _sort_order = 0
-
- def create_split_changeset(self, id, cvs_item_ids):
- return TagChangeset(id, self.symbol, cvs_item_ids)
-
- def __str__(self):
- return 'TagChangeset<%x>("%s")' % (self.id, self.symbol,)
-
-
-def create_symbol_changeset(id, symbol, cvs_item_ids):
- """Factory function for SymbolChangesets.
-
- Return a BranchChangeset or TagChangeset, depending on the type of
- SYMBOL. SYMBOL must be a Branch or Tag."""
-
- if isinstance(symbol, Branch):
- return BranchChangeset(id, symbol, cvs_item_ids)
- if isinstance(symbol, Tag):
- return TagChangeset(id, symbol, cvs_item_ids)
- else:
- raise InternalError('Unknown symbol type %s' % (symbol,))
-
-
diff --git a/cvs2svn_lib/changeset_database.py b/cvs2svn_lib/changeset_database.py
deleted file mode 100644
index 82ca904..0000000
--- a/cvs2svn_lib/changeset_database.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes to store changesets."""
-
-
-from cvs2svn_lib.changeset import Changeset
-from cvs2svn_lib.changeset import RevisionChangeset
-from cvs2svn_lib.changeset import OrderedChangeset
-from cvs2svn_lib.changeset import SymbolChangeset
-from cvs2svn_lib.changeset import BranchChangeset
-from cvs2svn_lib.changeset import TagChangeset
-from cvs2svn_lib.record_table import UnsignedIntegerPacker
-from cvs2svn_lib.record_table import MmapRecordTable
-from cvs2svn_lib.record_table import RecordTable
-from cvs2svn_lib.database import IndexedStore
-from cvs2svn_lib.serializer import PrimedPickleSerializer
-
-
-# Should the CVSItemToChangesetTable database files be memory mapped?
-# This speeds up the converstion but can cause the computer's virtual
-# address space to be exhausted. This option can be changed
-# externally, affecting any CVSItemToChangesetTables opened subsequent
-# to the change:
-use_mmap_for_cvs_item_to_changeset_table = False
-
-
-def CVSItemToChangesetTable(filename, mode):
- if use_mmap_for_cvs_item_to_changeset_table:
- return MmapRecordTable(filename, mode, UnsignedIntegerPacker())
- else:
- return RecordTable(filename, mode, UnsignedIntegerPacker())
-
-
-class ChangesetDatabase(IndexedStore):
- def __init__(self, filename, index_filename, mode):
- primer = (
- Changeset,
- RevisionChangeset,
- OrderedChangeset,
- SymbolChangeset,
- BranchChangeset,
- TagChangeset,
- )
- IndexedStore.__init__(
- self, filename, index_filename, mode, PrimedPickleSerializer(primer))
-
- def store(self, changeset):
- self.add(changeset)
-
- def keys(self):
- return list(self.iterkeys())
-
- def close(self):
- IndexedStore.close(self)
-
-
diff --git a/cvs2svn_lib/changeset_graph.py b/cvs2svn_lib/changeset_graph.py
deleted file mode 100644
index 64ebf2c..0000000
--- a/cvs2svn_lib/changeset_graph.py
+++ /dev/null
@@ -1,456 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""The changeset dependency graph."""
-
-
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.changeset import RevisionChangeset
-from cvs2svn_lib.changeset import OrderedChangeset
-from cvs2svn_lib.changeset import BranchChangeset
-from cvs2svn_lib.changeset import TagChangeset
-
-
-class CycleInGraphException(Exception):
- def __init__(self, cycle):
- Exception.__init__(
- self,
- 'Cycle found in graph: %s'
- % ' -> '.join(map(str, cycle + [cycle[0]])))
-
-
-class NoPredNodeInGraphException(Exception):
- def __init__(self, node):
- Exception.__init__(self, 'Node %s has no predecessors' % (node,))
-
-
-class _NoPredNodes:
- """Manage changesets that are to be processed.
-
- Output the changesets in order by time and changeset type.
-
- The implementation of this class is crude: as changesets are added,
- they are appended to a list. When one is needed, the list is sorted
- in reverse order and then the last changeset in the list is
- returned. To reduce the number of sorts that are needed, the class
- keeps track of whether the list is currently sorted.
-
- All this repeated sorting is wasteful and unnecessary. We should
- instead use a heap to output the changeset order, which would
- require O(lg N) work per add()/get() rather than O(1) and O(N lg N)
- as in the current implementation [1]. But: (1) the lame interface
- of heapq doesn't allow an arbitrary compare function, so we would
- have to store extra information in the array elements; (2) in
- practice, the number of items in the list at any time is only a tiny
- fraction of the total number of changesets; and (3) testing showed
- that the heapq implementation is no faster than this one (perhaps
- because of the increased memory usage).
-
- [1] According to Objects/listsort.txt in the Python source code, the
- Python list-sorting code is heavily optimized for arrays that have
- runs of already-sorted elements, so the current cost of get() is
- probably closer to O(N) than O(N lg N)."""
-
- def __init__(self, changeset_db):
- self.changeset_db = changeset_db
- # A list [(node, changeset,)] of nodes with no predecessors:
- self._nodes = []
- self._sorted = True
-
- def __len__(self):
- return len(self._nodes)
-
- @staticmethod
- def _compare((node_1, changeset_1), (node_2, changeset_2)):
- """Define a (reverse) ordering on self._nodes."""
-
- return cmp(node_2.time_range, node_1.time_range) \
- or cmp(changeset_2, changeset_1)
-
- def add(self, node):
- self._nodes.append( (node, self.changeset_db[node.id],) )
- self._sorted = False
-
- def get(self):
- """Return (node, changeset,) of the smallest node.
-
- 'Smallest' is defined by self._compare()."""
-
- if not self._sorted:
- self._nodes.sort(self._compare)
- self._sorted = True
- return self._nodes.pop()
-
-
-class ChangesetGraph(object):
- """A graph of changesets and their dependencies."""
-
- def __init__(self, changeset_db, cvs_item_to_changeset_id):
- self._changeset_db = changeset_db
- self._cvs_item_to_changeset_id = cvs_item_to_changeset_id
- # A map { id : ChangesetGraphNode }
- self.nodes = {}
-
- def close(self):
- self._cvs_item_to_changeset_id.close()
- self._cvs_item_to_changeset_id = None
- self._changeset_db.close()
- self._changeset_db = None
-
- def add_changeset(self, changeset):
- """Add CHANGESET to this graph.
-
- Determine and record any dependencies to changesets that are
- already in the graph. This method does not affect the databases."""
-
- node = changeset.create_graph_node(self._cvs_item_to_changeset_id)
-
- # Now tie the node into our graph. If a changeset referenced by
- # node is already in our graph, then add the backwards connection
- # from the other node to the new one. If not, then delete the
- # changeset from node.
-
- for pred_id in list(node.pred_ids):
- pred_node = self.nodes.get(pred_id)
- if pred_node is not None:
- pred_node.succ_ids.add(node.id)
- else:
- node.pred_ids.remove(pred_id)
-
- for succ_id in list(node.succ_ids):
- succ_node = self.nodes.get(succ_id)
- if succ_node is not None:
- succ_node.pred_ids.add(node.id)
- else:
- node.succ_ids.remove(succ_id)
-
- self.nodes[node.id] = node
-
- def store_changeset(self, changeset):
- for cvs_item_id in changeset.cvs_item_ids:
- self._cvs_item_to_changeset_id[cvs_item_id] = changeset.id
- self._changeset_db.store(changeset)
-
- def add_new_changeset(self, changeset):
- """Add the new CHANGESET to the graph and also to the databases."""
-
- if Log().is_on(Log.DEBUG):
- Log().debug('Adding changeset %r' % (changeset,))
-
- self.add_changeset(changeset)
- self.store_changeset(changeset)
-
- def delete_changeset(self, changeset):
- """Remove CHANGESET from the graph and also from the databases.
-
- In fact, we don't remove CHANGESET from
- self._cvs_item_to_changeset_id, because in practice the CVSItems
- in CHANGESET are always added again as part of a new CHANGESET,
- which will cause the old values to be overwritten."""
-
- if Log().is_on(Log.DEBUG):
- Log().debug('Removing changeset %r' % (changeset,))
-
- del self[changeset.id]
- del self._changeset_db[changeset.id]
-
- def __nonzero__(self):
- """Instances are considered True iff they contain any nodes."""
-
- return bool(self.nodes)
-
- def __contains__(self, id):
- """Return True if the specified ID is contained in this graph."""
-
- return id in self.nodes
-
- def __getitem__(self, id):
- return self.nodes[id]
-
- def get(self, id):
- return self.nodes.get(id)
-
- def __delitem__(self, id):
- """Remove the node corresponding to ID.
-
- Also remove references to it from other nodes. This method does
- not change pred_ids or succ_ids of the node being deleted, nor
- does it affect the databases."""
-
- node = self[id]
-
- for succ_id in node.succ_ids:
- succ = self[succ_id]
- succ.pred_ids.remove(node.id)
-
- for pred_id in node.pred_ids:
- pred = self[pred_id]
- pred.succ_ids.remove(node.id)
-
- del self.nodes[node.id]
-
- def keys(self):
- return self.nodes.keys()
-
- def __iter__(self):
- return self.nodes.itervalues()
-
- def _get_path(self, reachable_changesets, starting_node_id, ending_node_id):
- """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID.
-
- Find a path from ENDING_NODE_ID to STARTING_NODE_ID in
- REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a
- changeset that depends on the changeset with ENDING_NODE_ID. (See
- the comment in search_for_path() for a description of the format
- of REACHABLE_CHANGESETS.)
-
- Return a list of changesets, where the 0th one has ENDING_NODE_ID
- and the last one has STARTING_NODE_ID. If there is no such path
- described in in REACHABLE_CHANGESETS, return None."""
-
- if ending_node_id not in reachable_changesets:
- return None
-
- path = [self._changeset_db[ending_node_id]]
- id = reachable_changesets[ending_node_id][1]
- while id != starting_node_id:
- path.append(self._changeset_db[id])
- id = reachable_changesets[id][1]
- path.append(self._changeset_db[starting_node_id])
- return path
-
- def search_for_path(self, starting_node_id, stop_set):
- """Search for paths to prerequisites of STARTING_NODE_ID.
-
- Try to find the shortest dependency path that causes the changeset
- with STARTING_NODE_ID to depend (directly or indirectly) on one of
- the changesets whose ids are contained in STOP_SET.
-
- We consider direct and indirect dependencies in the sense that the
- changeset can be reached by following a chain of predecessor nodes.
-
- When one of the changeset_ids in STOP_SET is found, terminate the
- search and return the path from that changeset_id to
- STARTING_NODE_ID. If no path is found to a node in STOP_SET,
- return None."""
-
- # A map {node_id : (steps, next_node_id)} where NODE_ID can be
- # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID
- # is the id of the previous node in the path. STARTING_NODE_ID is
- # only included as a key if there is a loop leading back to it.
- reachable_changesets = {}
-
- # A list of (node_id, steps) that still have to be investigated,
- # and STEPS is the number of steps to get to NODE_ID.
- open_nodes = [(starting_node_id, 0)]
- # A breadth-first search:
- while open_nodes:
- (id, steps) = open_nodes.pop(0)
- steps += 1
- node = self[id]
- for pred_id in node.pred_ids:
- # Since the search is breadth-first, we only have to set steps
- # that don't already exist.
- if pred_id not in reachable_changesets:
- reachable_changesets[pred_id] = (steps, id)
- open_nodes.append((pred_id, steps))
-
- # See if we can stop now:
- if pred_id in stop_set:
- return self._get_path(
- reachable_changesets, starting_node_id, pred_id
- )
-
- return None
-
- def consume_nopred_nodes(self):
- """Remove and yield changesets in dependency order.
-
- Each iteration, this generator yields a (changeset, time_range)
- tuple for the oldest changeset in the graph that doesn't have any
- predecessor nodes (i.e., it is ready to be committed). This is
- continued until there are no more nodes without predecessors
- (either because the graph has been emptied, or because of cycles
- in the graph).
-
- Among the changesets that are ready to be processed, the earliest
- one (according to the sorting of the TimeRange class) is yielded
- each time. (This is the order in which the changesets should be
- committed.)
-
- The graph should not be otherwise altered while this generator is
- running."""
-
- # Find a list of (node,changeset,) where the node has no
- # predecessors:
- nopred_nodes = _NoPredNodes(self._changeset_db)
- for node in self.nodes.itervalues():
- if not node.pred_ids:
- nopred_nodes.add(node)
-
- while nopred_nodes:
- (node, changeset,) = nopred_nodes.get()
- del self[node.id]
- # See if any successors are now ready for extraction:
- for succ_id in node.succ_ids:
- succ = self[succ_id]
- if not succ.pred_ids:
- nopred_nodes.add(succ)
- yield (changeset, node.time_range)
-
- def find_cycle(self, starting_node_id):
- """Find a cycle in the dependency graph and return it.
-
- Use STARTING_NODE_ID as the place to start looking. This routine
- must only be called after all nopred_nodes have been removed.
- Return the list of changesets that are involved in the cycle
- (ordered such that cycle[n-1] is a predecessor of cycle[n] and
- cycle[-1] is a predecessor of cycle[0])."""
-
- # Since there are no nopred nodes in the graph, all nodes in the
- # graph must either be involved in a cycle or depend (directly or
- # indirectly) on nodes that are in a cycle.
-
- # Pick an arbitrary node:
- node = self[starting_node_id]
-
- seen_nodes = [node]
-
- # Follow it backwards until a node is seen a second time; then we
- # have our cycle.
- while True:
- # Pick an arbitrary predecessor of node. It must exist, because
- # there are no nopred nodes:
- try:
- node_id = node.pred_ids.__iter__().next()
- except StopIteration:
- raise NoPredNodeInGraphException(node)
- node = self[node_id]
- try:
- i = seen_nodes.index(node)
- except ValueError:
- seen_nodes.append(node)
- else:
- seen_nodes = seen_nodes[i:]
- seen_nodes.reverse()
- return [self._changeset_db[node.id] for node in seen_nodes]
-
- def consume_graph(self, cycle_breaker=None):
- """Remove and yield changesets from this graph in dependency order.
-
- Each iteration, this generator yields a (changeset, time_range)
- tuple for the oldest changeset in the graph that doesn't have any
- predecessor nodes. If CYCLE_BREAKER is specified, then call
- CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle
- is the list of changesets that are involved in the cycle (ordered
- such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is
- a predecessor of cycle[0]). CYCLE_BREAKER should break the cycle
- in place then return.
-
- If a cycle is found and CYCLE_BREAKER was not specified, raise
- CycleInGraphException."""
-
- while True:
- for (changeset, time_range) in self.consume_nopred_nodes():
- yield (changeset, time_range)
-
- # If there are any nodes left in the graph, then there must be
- # at least one cycle. Find a cycle and process it.
-
- # This might raise StopIteration, but that indicates that the
- # graph has been fully consumed, so we just let the exception
- # escape.
- start_node_id = self.nodes.iterkeys().next()
-
- cycle = self.find_cycle(start_node_id)
-
- if cycle_breaker is not None:
- cycle_breaker(cycle)
- else:
- raise CycleInGraphException(cycle)
-
- def __repr__(self):
- """For convenience only. The format is subject to change at any time."""
-
- if self.nodes:
- return 'ChangesetGraph:\n%s' \
- % ''.join([' %r\n' % node for node in self])
- else:
- return 'ChangesetGraph:\n EMPTY\n'
-
- node_colors = {
- RevisionChangeset : 'lightgreen',
- OrderedChangeset : 'cyan',
- BranchChangeset : 'orange',
- TagChangeset : 'yellow',
- }
-
- def output_coarse_dot(self, f):
- """Output the graph in DOT format to file-like object f.
-
- Such a file can be rendered into a visual representation of the
- graph using tools like graphviz. Include only changesets in the
- graph, and the dependencies between changesets."""
-
- f.write('digraph G {\n')
- for node in self:
- f.write(
- ' C%x [style=filled, fillcolor=%s];\n' % (
- node.id,
- self.node_colors[self._changeset_db[node.id].__class__],
- )
- )
- f.write('\n')
-
- for node in self:
- for succ_id in node.succ_ids:
- f.write(' C%x -> C%x\n' % (node.id, succ_id,))
- f.write('\n')
-
- f.write('}\n')
-
- def output_fine_dot(self, f):
- """Output the graph in DOT format to file-like object f.
-
- Such a file can be rendered into a visual representation of the
- graph using tools like graphviz. Include all CVSItems and the
- CVSItem-CVSItem dependencies in the graph. Group the CVSItems
- into clusters by changeset."""
-
- f.write('digraph G {\n')
- for node in self:
- f.write(' subgraph cluster_%x {\n' % (node.id,))
- f.write(' label = "C%x";\n' % (node.id,))
- changeset = self._changeset_db[node.id]
- for item_id in changeset.cvs_item_ids:
- f.write(' I%x;\n' % (item_id,))
- f.write(' style=filled;\n')
- f.write(
- ' fillcolor=%s;\n'
- % (self.node_colors[self._changeset_db[node.id].__class__],))
- f.write(' }\n\n')
-
- for node in self:
- changeset = self._changeset_db[node.id]
- for cvs_item in changeset.iter_cvs_items():
- for succ_id in cvs_item.get_succ_ids():
- f.write(' I%x -> I%x;\n' % (cvs_item.id, succ_id,))
-
- f.write('\n')
-
- f.write('}\n')
-
-
diff --git a/cvs2svn_lib/changeset_graph_link.py b/cvs2svn_lib/changeset_graph_link.py
deleted file mode 100644
index 9d0cc9d..0000000
--- a/cvs2svn_lib/changeset_graph_link.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Keep track of counts of different types of changeset links."""
-
-
-
-# A cvs_item doesn't depend on any cvs_items in either pred or succ:
-LINK_NONE = 0
-
-# A cvs_item depends on one or more cvs_items in pred but none in succ:
-LINK_PRED = 1
-
-# A cvs_item depends on one or more cvs_items in succ but none in pred:
-LINK_SUCC = 2
-
-# A cvs_item depends on one or more cvs_items in both pred and succ:
-LINK_PASSTHRU = LINK_PRED | LINK_SUCC
-
-
-class ChangesetGraphLink(object):
- def __init__(self, pred, changeset, succ):
- """Represent a link in a loop in a changeset graph.
-
- This is the link that goes from PRED -> CHANGESET -> SUCC.
-
- We are mainly concerned with how many CVSItems have LINK_PRED,
- LINK_SUCC, and LINK_PASSTHRU type links to the neighboring
- commitsets. If necessary, this class can also break up CHANGESET
- into multiple changesets."""
-
- self.pred = pred
- self.pred_ids = set(pred.cvs_item_ids)
-
- self.changeset = changeset
-
- self.succ_ids = set(succ.cvs_item_ids)
- self.succ = succ
-
- # A count of each type of link for cvs_items in changeset
- # (indexed by LINK_* constants):
- link_counts = [0] * 4
-
- for cvs_item in list(changeset.iter_cvs_items()):
- link_counts[self.get_link_type(cvs_item)] += 1
-
- [self.pred_links, self.succ_links, self.passthru_links] = link_counts[1:]
-
- def get_link_type(self, cvs_item):
- """Return the type of links from CVS_ITEM to self.PRED and self.SUCC.
-
- The return value is one of LINK_NONE, LINK_PRED, LINK_SUCC, or
- LINK_PASSTHRU."""
-
- retval = LINK_NONE
-
- if cvs_item.get_pred_ids() & self.pred_ids:
- retval |= LINK_PRED
- if cvs_item.get_succ_ids() & self.succ_ids:
- retval |= LINK_SUCC
-
- return retval
-
- def get_links_to_move(self):
- """Return the number of items that would be moved to split changeset."""
-
- return min(self.pred_links, self.succ_links) \
- or max(self.pred_links, self.succ_links)
-
- def is_breakable(self):
- """Return True iff breaking the changeset will do any good."""
-
- return self.pred_links != 0 or self.succ_links != 0
-
- def __cmp__(self, other):
- """Compare SELF with OTHER in terms of which would be better to break.
-
- The one that is better to break is considered the lesser."""
-
- return (
- - cmp(int(self.is_breakable()), int(other.is_breakable()))
- or cmp(self.passthru_links, other.passthru_links)
- or cmp(self.get_links_to_move(), other.get_links_to_move())
- )
-
- def break_changeset(self, changeset_key_generator):
- """Break up self.changeset and return the fragments.
-
- Break it up in such a way that the link is weakened as efficiently
- as possible."""
-
- if not self.is_breakable():
- raise ValueError('Changeset is not breakable: %r' % self.changeset)
-
- pred_items = []
- succ_items = []
-
- # For each link type, should such CVSItems be moved to the
- # changeset containing the predecessor items or the one containing
- # the successor items?
- destination = {
- LINK_PRED : pred_items,
- LINK_SUCC : succ_items,
- }
-
- if self.pred_links == 0:
- destination[LINK_NONE] = pred_items
- destination[LINK_PASSTHRU] = pred_items
- elif self.succ_links == 0:
- destination[LINK_NONE] = succ_items
- destination[LINK_PASSTHRU] = succ_items
- elif self.pred_links < self.succ_links:
- destination[LINK_NONE] = succ_items
- destination[LINK_PASSTHRU] = succ_items
- else:
- destination[LINK_NONE] = pred_items
- destination[LINK_PASSTHRU] = pred_items
-
- for cvs_item in self.changeset.iter_cvs_items():
- link_type = self.get_link_type(cvs_item)
- destination[link_type].append(cvs_item.id)
-
- # Create new changesets of the same type as the old one:
- return [
- self.changeset.create_split_changeset(
- changeset_key_generator.gen_id(), pred_items),
- self.changeset.create_split_changeset(
- changeset_key_generator.gen_id(), succ_items),
- ]
-
- def __str__(self):
- return 'Link<%x>(%d, %d, %d)' % (
- self.changeset.id,
- self.pred_links, self.succ_links, self.passthru_links)
-
-
diff --git a/cvs2svn_lib/changeset_graph_node.py b/cvs2svn_lib/changeset_graph_node.py
deleted file mode 100644
index cbbebd7..0000000
--- a/cvs2svn_lib/changeset_graph_node.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""A node in the changeset dependency graph."""
-
-
-class ChangesetGraphNode(object):
- """A node in the changeset dependency graph."""
-
- __slots__ = ['id', 'time_range', 'pred_ids', 'succ_ids']
-
- def __init__(self, changeset, time_range, pred_ids, succ_ids):
- # The id of the ChangesetGraphNode is the same as the id of the
- # changeset.
- self.id = changeset.id
-
- # The range of times of CVSItems within this Changeset.
- self.time_range = time_range
-
- # The set of changeset ids of changesets that are direct
- # predecessors of this one.
- self.pred_ids = pred_ids
-
- # The set of changeset ids of changesets that are direct
- # successors of this one.
- self.succ_ids = succ_ids
-
- def __repr__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%x; pred=[%s]; succ=[%s]' % (
- self.id,
- ','.join(['%x' % id for id in self.pred_ids]),
- ','.join(['%x' % id for id in self.succ_ids]),
- )
-
-
diff --git a/cvs2svn_lib/check_dependencies_pass.py b/cvs2svn_lib/check_dependencies_pass.py
deleted file mode 100644
index 172c264..0000000
--- a/cvs2svn_lib/check_dependencies_pass.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module defines some passes that can be used for debugging cv2svn."""
-
-
-from cvs2svn_lib import config
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.common import FatalException
-from cvs2svn_lib.common import DB_OPEN_READ
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.pass_manager import Pass
-from cvs2svn_lib.project import read_projects
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.cvs_file_database import CVSFileDatabase
-from cvs2svn_lib.symbol_database import SymbolDatabase
-from cvs2svn_lib.cvs_item_database import OldCVSItemStore
-from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
-
-
-class CheckDependenciesPass(Pass):
- """Check that the dependencies are self-consistent."""
-
- def __init__(self):
- Pass.__init__(self)
-
- def register_artifacts(self):
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CVS_FILES_DB)
-
- def iter_cvs_items(self):
- raise NotImplementedError()
-
- def get_cvs_item(self, item_id):
- raise NotImplementedError()
-
- def run(self, run_options, stats_keeper):
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- self.symbol_db = SymbolDatabase()
- Ctx()._symbol_db = self.symbol_db
-
- Log().quiet("Checking dependency consistency...")
-
- fatal_errors = []
- for cvs_item in self.iter_cvs_items():
- # Check that the pred_ids and succ_ids are mutually consistent:
- for pred_id in cvs_item.get_pred_ids():
- pred = self.get_cvs_item(pred_id)
- if not cvs_item.id in pred.get_succ_ids():
- fatal_errors.append(
- '%s lists pred=%s, but not vice versa.' % (cvs_item, pred,))
-
- for succ_id in cvs_item.get_succ_ids():
- succ = self.get_cvs_item(succ_id)
- if not cvs_item.id in succ.get_pred_ids():
- fatal_errors.append(
- '%s lists succ=%s, but not vice versa.' % (cvs_item, succ,))
-
- if fatal_errors:
- raise FatalException(
- 'Dependencies inconsistent:\n'
- '%s\n'
- 'Exited due to fatal error(s).'
- % ('\n'.join(fatal_errors),)
- )
-
- self.symbol_db.close()
- self.symbol_db = None
- Ctx()._cvs_file_db.close()
- Log().quiet("Done")
-
-
-class CheckItemStoreDependenciesPass(CheckDependenciesPass):
- def __init__(self, cvs_items_store_file):
- CheckDependenciesPass.__init__(self)
- self.cvs_items_store_file = cvs_items_store_file
-
- def register_artifacts(self):
- CheckDependenciesPass.register_artifacts(self)
- self._register_temp_file_needed(self.cvs_items_store_file)
-
- def iter_cvs_items(self):
- cvs_item_store = OldCVSItemStore(
- artifact_manager.get_temp_file(self.cvs_items_store_file))
-
- for cvs_file_items in cvs_item_store.iter_cvs_file_items():
- self.current_cvs_file_items = cvs_file_items
- for cvs_item in cvs_file_items.values():
- yield cvs_item
-
- del self.current_cvs_file_items
-
- cvs_item_store.close()
-
- def get_cvs_item(self, item_id):
- return self.current_cvs_file_items[item_id]
-
-
-class CheckIndexedItemStoreDependenciesPass(CheckDependenciesPass):
- def __init__(self, cvs_items_store_file, cvs_items_store_index_file):
- CheckDependenciesPass.__init__(self)
- self.cvs_items_store_file = cvs_items_store_file
- self.cvs_items_store_index_file = cvs_items_store_index_file
-
- def register_artifacts(self):
- CheckDependenciesPass.register_artifacts(self)
- self._register_temp_file_needed(self.cvs_items_store_file)
- self._register_temp_file_needed(self.cvs_items_store_index_file)
-
- def iter_cvs_items(self):
- return self.cvs_item_store.itervalues()
-
- def get_cvs_item(self, item_id):
- return self.cvs_item_store[item_id]
-
- def run(self, run_options, stats_keeper):
- self.cvs_item_store = IndexedCVSItemStore(
- artifact_manager.get_temp_file(self.cvs_items_store_file),
- artifact_manager.get_temp_file(self.cvs_items_store_index_file),
- DB_OPEN_READ)
-
- CheckDependenciesPass.run(self, run_options, stats_keeper)
-
- self.cvs_item_store.close()
- self.cvs_item_store = None
-
-
diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py
deleted file mode 100644
index fe28e0c..0000000
--- a/cvs2svn_lib/checkout_internal.py
+++ /dev/null
@@ -1,778 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes that implement the --use-internal-co option.
-
-The idea is to patch up the revisions' contents incrementally, thus
-avoiding the huge number of process spawns and the O(n^2) overhead of
-using 'co' and 'cvs'.
-
-InternalRevisionRecorder saves the RCS deltas and RCS revision trees
-to databases. Notably, deltas from the trunk need to be reversed, as
-CVS stores them so they apply from HEAD backwards.
-
-InternalRevisionExcluder copies the revision trees to a new database,
-omitting excluded branches.
-
-InternalRevisionReader produces the revisions' contents on demand. To
-generate the text for a typical revision, we need the revision's delta
-text plus the fulltext of the previous revision. Therefore, we
-maintain a checkout database containing a copy of the fulltext of any
-revision for which subsequent revisions still need to be retrieved.
-It is crucial to remove text from this database as soon as it is no
-longer needed, to prevent it from growing enormous.
-
-There are two reasons that the text from a revision can be needed: (1)
-because the revision itself still needs to be output to a dumpfile;
-(2) because another revision needs it as the base of its delta. We
-maintain a reference count for each revision, which includes *both*
-possibilities. The first time a revision's text is needed, it is
-generated by applying the revision's deltatext to the previous
-revision's fulltext, and the resulting fulltext is stored in the
-checkout database. Each time a revision's fulltext is retrieved, its
-reference count is decremented. When the reference count goes to
-zero, then the fulltext is deleted from the checkout database.
-
-The administrative data for managing this consists of one TextRecord
-entry for each revision. Each TextRecord has an id, which is the same
-id as used for the corresponding CVSRevision instance. It also
-maintains a count of the times it is expected to be retrieved.
-TextRecords come in several varieties:
-
-FullTextRecord -- Used for revisions whose fulltext is contained
- directly in the RCS file, and therefore available during
- CollectRevsPass (i.e., typically revision 1.1 of each file).
-
-DeltaTextRecord -- Used for revisions that are defined via a delta
- relative to some other TextRecord. These records record the id of
- the TextRecord that holds the base text against which the delta is
- defined. When the text for a DeltaTextRecord is retrieved, the
- DeltaTextRecord instance is deleted and a CheckedOutTextRecord
- instance is created to take its place.
-
-CheckedOutTextRecord -- Used during OutputPass for a revision that
- started out as a DeltaTextRecord, but has already been retrieved
- (and therefore its fulltext is stored in the checkout database).
-
-While a file is being processed during CollectRevsPass, the fulltext
-and deltas are stored to the delta database, and TextRecord instances
-are created to keep track of things. The reference counts are all
-initialized to zero.
-
-After CollectRevsPass has done any preliminary tree mangling, its
-_FileDataCollector.parse_completed(), method calls
-RevisionRecorder.finish_file(), passing it the CVSFileItems instance
-that describes the revisions in the file. At this point the reference
-counts for the file's TextRecords are updated: each record referred to
-by a delta has its refcount incremented, and each record that
-corresponds to a non-delete CVSRevision is incremented. After that,
-any records with refcount==0 are removed. When one record is removed,
-that can cause another record's reference count to go to zero and be
-removed too, recursively. When a TextRecord is deleted at this stage,
-its deltatext is also deleted from the delta database.
-
-In FilterSymbolsPass, the exact same procedure (described in the
-previous paragraph) is repeated, but this time using the CVSFileItems
-after it has been updated for excluded symbols, symbol
-preferred-parent grafting, etc."""
-
-
-import cStringIO
-import re
-import time
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.common import DB_OPEN_READ
-from cvs2svn_lib.common import warning_prefix
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.cvs_item import CVSRevisionModification
-from cvs2svn_lib.database import Database
-from cvs2svn_lib.database import IndexedDatabase
-from cvs2svn_lib.rcs_stream import RCSStream
-from cvs2svn_lib.rcs_stream import MalformedDeltaException
-from cvs2svn_lib.revision_manager import RevisionRecorder
-from cvs2svn_lib.revision_manager import RevisionExcluder
-from cvs2svn_lib.revision_manager import RevisionReader
-from cvs2svn_lib.serializer import MarshalSerializer
-from cvs2svn_lib.serializer import CompressingSerializer
-from cvs2svn_lib.serializer import PrimedPickleSerializer
-
-
-class TextRecord(object):
- """Bookkeeping data for the text of a single CVSRevision."""
-
- __slots__ = ['id', 'refcount']
-
- def __init__(self, id):
- # The cvs_rev_id of the revision whose text this is.
- self.id = id
-
- # The number of times that the text of this revision will be
- # retrieved.
- self.refcount = 0
-
- def __getstate__(self):
- return (self.id, self.refcount,)
-
- def __setstate__(self, state):
- (self.id, self.refcount,) = state
-
- def increment_dependency_refcounts(self, text_record_db):
- """Increment the refcounts of any records that this one depends on."""
-
- pass
-
- def decrement_refcount(self, text_record_db):
- """Decrement the number of times our text still has to be checked out.
-
- If the reference count goes to zero, call discard()."""
-
- self.refcount -= 1
- if self.refcount == 0:
- text_record_db.discard(self.id)
-
- def checkout(self, text_record_db):
- """Workhorse of the checkout process.
-
- Return the text for this revision, decrement our reference count,
- and update the databases depending on whether there will be future
- checkouts."""
-
- raise NotImplementedError()
-
- def free(self, text_record_db):
- """This instance will never again be checked out; free it.
-
- Also free any associated resources and decrement the refcounts of
- any other TextRecords that this one depends on."""
-
- raise NotImplementedError()
-
-
-class FullTextRecord(TextRecord):
- __slots__ = []
-
- def __getstate__(self):
- return (self.id, self.refcount,)
-
- def __setstate__(self, state):
- (self.id, self.refcount,) = state
-
- def checkout(self, text_record_db):
- text = text_record_db.delta_db[self.id]
- self.decrement_refcount(text_record_db)
- return text
-
- def free(self, text_record_db):
- del text_record_db.delta_db[self.id]
-
- def __str__(self):
- return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
-
-
-class DeltaTextRecord(TextRecord):
- __slots__ = ['pred_id']
-
- def __init__(self, id, pred_id):
- TextRecord.__init__(self, id)
-
- # The cvs_rev_id of the revision relative to which this delta is
- # defined.
- self.pred_id = pred_id
-
- def __getstate__(self):
- return (self.id, self.refcount, self.pred_id,)
-
- def __setstate__(self, state):
- (self.id, self.refcount, self.pred_id,) = state
-
- def increment_dependency_refcounts(self, text_record_db):
- text_record_db[self.pred_id].refcount += 1
-
- def checkout(self, text_record_db):
- base_text = text_record_db[self.pred_id].checkout(text_record_db)
- co = RCSStream(base_text)
- delta_text = text_record_db.delta_db[self.id]
- co.apply_diff(delta_text)
- text = co.get_text()
- del co
- self.refcount -= 1
- if self.refcount == 0:
- # This text will never be needed again; just delete ourselves
- # without ever having stored the fulltext to the checkout
- # database:
- del text_record_db[self.id]
- else:
- # Store a new CheckedOutTextRecord in place of ourselves:
- text_record_db.checkout_db['%x' % self.id] = text
- new_text_record = CheckedOutTextRecord(self.id)
- new_text_record.refcount = self.refcount
- text_record_db.replace(new_text_record)
- return text
-
- def free(self, text_record_db):
- del text_record_db.delta_db[self.id]
- text_record_db[self.pred_id].decrement_refcount(text_record_db)
-
- def __str__(self):
- return 'DeltaTextRecord(%x -> %x, %d)' \
- % (self.pred_id, self.id, self.refcount,)
-
-
-class CheckedOutTextRecord(TextRecord):
- __slots__ = []
-
- def __getstate__(self):
- return (self.id, self.refcount,)
-
- def __setstate__(self, state):
- (self.id, self.refcount,) = state
-
- def checkout(self, text_record_db):
- text = text_record_db.checkout_db['%x' % self.id]
- self.decrement_refcount(text_record_db)
- return text
-
- def free(self, text_record_db):
- del text_record_db.checkout_db['%x' % self.id]
-
- def __str__(self):
- return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
-
-
-class NullDatabase(object):
- """A do-nothing database that can be used with TextRecordDatabase.
-
- Use this when you don't actually want to allow anything to be
- deleted."""
-
- def __delitem__(self, id):
- pass
-
-
-class TextRecordDatabase:
- """Holds the TextRecord instances that are currently live.
-
- During CollectRevsPass and FilterSymbolsPass, files are processed
- one by one and a new TextRecordDatabase instance is used for each
- file. During OutputPass, a single TextRecordDatabase instance is
- used for the duration of OutputPass; individual records are added
- and removed when they are active."""
-
- def __init__(self, delta_db, checkout_db):
- # A map { cvs_rev_id -> TextRecord }.
- self.text_records = {}
-
- # A database-like object using cvs_rev_ids as keys and containing
- # fulltext/deltatext strings as values. Its __getitem__() method
- # is used to retrieve deltas when they are needed, and its
- # __delitem__() method is used to delete deltas when they can be
- # freed. The modifiability of the delta database varies from pass
- # to pass, so the object stored here varies as well:
- #
- # CollectRevsPass: a fully-functional IndexedDatabase. This
- # allows deltas that will not be needed to be deleted.
- #
- # FilterSymbolsPass: a NullDatabase. The delta database cannot be
- # modified during this pass, and we have no need to retrieve
- # deltas, so we just use a dummy object here.
- #
- # OutputPass: a disabled IndexedDatabase. During this pass we
- # need to retrieve deltas, but we are not allowed to modify the
- # delta database. So we use an IndexedDatabase whose __del__()
- # method has been disabled to do nothing.
- self.delta_db = delta_db
-
- # A database-like object using cvs_rev_ids as keys and containing
- # fulltext strings as values. This database is only set during
- # OutputPass.
- self.checkout_db = checkout_db
-
- # If this is set to a list, then the list holds the ids of
- # text_records that have to be deleted; when discard() is called,
- # it adds the requested id to the list but does not delete it. If
- # this member is set to None, then text_records are deleted
- # immediately when discard() is called.
- self.deferred_deletes = None
-
- def __getstate__(self):
- return (self.text_records.values(),)
-
- def __setstate__(self, state):
- (text_records,) = state
- self.text_records = {}
- for text_record in text_records:
- self.add(text_record)
- self.delta_db = NullDatabase()
- self.checkout_db = NullDatabase()
- self.deferred_deletes = None
-
- def add(self, text_record):
- """Add TEXT_RECORD to our database.
-
- There must not already be a record with the same id."""
-
- assert not self.text_records.has_key(text_record.id)
-
- self.text_records[text_record.id] = text_record
-
- def __getitem__(self, id):
- return self.text_records[id]
-
- def __delitem__(self, id):
- """Free the record with the specified ID."""
-
- del self.text_records[id]
-
- def replace(self, text_record):
- """Store TEXT_RECORD in place of the existing record with the same id.
-
- Do not do anything with the old record."""
-
- assert self.text_records.has_key(text_record.id)
- self.text_records[text_record.id] = text_record
-
- def discard(self, *ids):
- """The text records with IDS are no longer needed; discard them.
-
- This involves calling their free() methods and also removing them
- from SELF.
-
- If SELF.deferred_deletes is not None, then the ids to be deleted
- are added to the list instead of deleted immediately. This
- mechanism is to prevent a stack overflow from the avalanche of
- deletes that can result from deleting a long chain of revisions."""
-
- if self.deferred_deletes is None:
- # This is an outer-level delete.
- self.deferred_deletes = list(ids)
- while self.deferred_deletes:
- id = self.deferred_deletes.pop()
- text_record = self[id]
- if text_record.refcount != 0:
- raise InternalError(
- 'TextRecordDatabase.discard(%s) called with refcount = %d'
- % (text_record, text_record.refcount,)
- )
- # This call might cause other text_record ids to be added to
- # self.deferred_deletes:
- text_record.free(self)
- del self[id]
- self.deferred_deletes = None
- else:
- self.deferred_deletes.extend(ids)
-
- def itervalues(self):
- return self.text_records.itervalues()
-
- def recompute_refcounts(self, cvs_file_items):
- """Recompute the refcounts of the contained TextRecords.
-
- Use CVS_FILE_ITEMS to determine which records will be needed by
- cvs2svn."""
-
- # First clear all of the refcounts:
- for text_record in self.itervalues():
- text_record.refcount = 0
-
- # Now increment the reference count of records that are needed as
- # the source of another record's deltas:
- for text_record in self.itervalues():
- text_record.increment_dependency_refcounts(self.text_records)
-
- # Now increment the reference count of records that will be needed
- # by cvs2svn:
- for lod_items in cvs_file_items.iter_lods():
- for cvs_rev in lod_items.cvs_revisions:
- if isinstance(cvs_rev, CVSRevisionModification):
- self[cvs_rev.id].refcount += 1
-
- def free_unused(self):
- """Free any TextRecords whose reference counts are zero."""
-
- # The deletion of some of these text records might cause others to
- # be unused, in which case they will be deleted automatically.
- # But since the initially-unused records are not referred to by
- # any others, we don't have to be afraid that they will be deleted
- # before we get to them. But it *is* crucial that we create the
- # whole unused list before starting the loop.
-
- unused = [
- text_record.id
- for text_record in self.itervalues()
- if text_record.refcount == 0
- ]
-
- self.discard(*unused)
-
- def log_leftovers(self):
- """If any TextRecords still exist, log them."""
-
- if self.text_records:
- Log().warn(
- "%s: internal problem: leftover revisions in the checkout cache:"
- % warning_prefix)
- for text_record in self.itervalues():
- Log().warn(' %s' % (text_record,))
-
- def __repr__(self):
- """Debugging output of the current contents of the TextRecordDatabase."""
-
- retval = ['TextRecordDatabase:']
- for text_record in self.itervalues():
- retval.append(' %s' % (text_record,))
- return '\n'.join(retval)
-
-
-class InternalRevisionRecorder(RevisionRecorder):
- """A RevisionRecorder that reconstructs the fulltext internally."""
-
- def __init__(self, compress):
- RevisionRecorder.__init__(self)
- self._compress = compress
-
- def register_artifacts(self, which_pass):
- artifact_manager.register_temp_file(
- config.RCS_DELTAS_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
- artifact_manager.register_temp_file(
- config.RCS_TREES_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
-
- def start(self):
- ser = MarshalSerializer()
- if self._compress:
- ser = CompressingSerializer(ser)
- self._rcs_deltas = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
- artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
- DB_OPEN_NEW, ser)
- primer = (FullTextRecord, DeltaTextRecord)
- self._rcs_trees = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_TREES_STORE),
- artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
- DB_OPEN_NEW, PrimedPickleSerializer(primer))
-
- def start_file(self, cvs_file_items):
- self._cvs_file_items = cvs_file_items
-
- # A map from cvs_rev_id to TextRecord instance:
- self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
-
- def record_text(self, cvs_rev, log, text):
- if isinstance(cvs_rev.lod, Trunk):
- # On trunk, revisions are encountered in reverse order (1.<N>
- # ... 1.1) and deltas are inverted. The first text that we see
- # is the fulltext for the HEAD revision. After that, the text
- # corresponding to revision 1.N is the delta (1.<N+1> ->
- # 1.<N>)). We have to invert the deltas here so that we can
- # read the revisions out in dependency order; that is, for
- # revision 1.1 we want the fulltext, and for revision 1.<N> we
- # want the delta (1.<N-1> -> 1.<N>). This means that we can't
- # compute the delta for a revision until we see its logical
- # parent. When we finally see revision 1.1 (which is recognized
- # because it doesn't have a parent), we can record the diff (1.1
- # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
-
- if cvs_rev.next_id is None:
- # This is HEAD, as fulltext. Initialize the RCSStream so
- # that we can compute deltas backwards in time.
- self._stream = RCSStream(text)
- else:
- # Any other trunk revision is a backward delta. Apply the
- # delta to the RCSStream to mutate it to the contents of this
- # revision, and also to get the reverse delta, which we store
- # as the forward delta of our child revision.
- try:
- text = self._stream.invert_diff(text)
- except MalformedDeltaException, (msg):
- Log().error('Malformed RCS delta in %s, revision %s: %s'
- % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev,
- msg))
- raise RuntimeError
- text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
- self._writeout(text_record, text)
-
- if cvs_rev.prev_id is None:
- # This is revision 1.1. Write its fulltext:
- text_record = FullTextRecord(cvs_rev.id)
- self._writeout(text_record, self._stream.get_text())
-
- # There will be no more trunk revisions delivered, so free the
- # RCSStream.
- del self._stream
-
- else:
- # On branches, revisions are encountered in logical order
- # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
- # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
- # <BRANCH>.<N>). That's what we need, so just store it.
-
- # FIXME: It would be nice to avoid writing out branch deltas
- # when --trunk-only. (They will be deleted when finish_file()
- # is called, but if the delta db is in an IndexedDatabase the
- # deletions won't actually recover any disk space.)
- text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id)
- self._writeout(text_record, text)
-
- return None
-
- def _writeout(self, text_record, text):
- self.text_record_db.add(text_record)
- self._rcs_deltas[text_record.id] = text
-
- def finish_file(self, cvs_file_items):
- """Finish processing of the current file.
-
- Compute the initial text record refcounts, discard any records
- that are unneeded, and store the text records for the file to the
- _rcs_trees database."""
-
- # Delete our copy of the preliminary CVSFileItems:
- del self._cvs_file_items
-
- self.text_record_db.recompute_refcounts(cvs_file_items)
- self.text_record_db.free_unused()
- self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
- del self.text_record_db
-
- def finish(self):
- self._rcs_deltas.close()
- self._rcs_trees.close()
-
-
-class InternalRevisionExcluder(RevisionExcluder):
- """The RevisionExcluder used by InternalRevisionReader."""
-
- def register_artifacts(self, which_pass):
- artifact_manager.register_temp_file_needed(
- config.RCS_TREES_STORE, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.RCS_TREES_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file(
- config.RCS_TREES_FILTERED_STORE, which_pass
- )
- artifact_manager.register_temp_file(
- config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
- )
-
- def start(self):
- self._tree_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_TREES_STORE),
- artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
- DB_OPEN_READ)
- primer = (FullTextRecord, DeltaTextRecord)
- self._new_tree_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
- artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
- DB_OPEN_NEW, PrimedPickleSerializer(primer))
-
- def process_file(self, cvs_file_items):
- text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
- text_record_db.recompute_refcounts(cvs_file_items)
- text_record_db.free_unused()
- self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db
-
- def finish(self):
- self._tree_db.close()
- self._new_tree_db.close()
-
-
-class _KeywordExpander:
- """A class whose instances provide substitutions for CVS keywords.
-
- This class is used via its __call__() method, which should be called
- with a match object representing a match for a CVS keyword string.
- The method returns the replacement for the matched text.
-
- The __call__() method works by calling the method with the same name
- as that of the CVS keyword (converted to lower case).
-
- Instances of this class can be passed as the REPL argument to
- re.sub()."""
-
- date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
- date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12
-
- date_fmt = date_fmt_new
-
- @classmethod
- def use_old_date_format(klass):
- """Class method to ensure exact compatibility with CVS 1.11
- output. Use this if you want to verify your conversion and you're
- using CVS 1.11."""
- klass.date_fmt = klass.date_fmt_old
-
- def __init__(self, cvs_rev):
- self.cvs_rev = cvs_rev
-
- def __call__(self, match):
- return '$%s: %s $' % \
- (match.group(1), getattr(self, match.group(1).lower())(),)
-
- def author(self):
- return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
-
- def date(self):
- return time.strftime(self.date_fmt,
- time.gmtime(self.cvs_rev.timestamp))
-
- def header(self):
- return '%s %s %s %s Exp' % \
- (self.source(), self.cvs_rev.rev, self.date(), self.author())
-
- def id(self):
- return '%s %s %s %s Exp' % \
- (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
-
- def locker(self):
- # Handle kvl like kv, as a converted repo is supposed to have no
- # locks.
- return ''
-
- def log(self):
- # Would need some special handling.
- return 'not supported by cvs2svn'
-
- def name(self):
- # Cannot work, as just creating a new symbol does not check out
- # the revision again.
- return 'not supported by cvs2svn'
-
- def rcsfile(self):
- return self.cvs_rev.cvs_file.basename + ",v"
-
- def revision(self):
- return self.cvs_rev.rev
-
- def source(self):
- project = self.cvs_rev.cvs_file.project
- return project.cvs_repository_root + '/' + project.cvs_module + \
- self.cvs_rev.cvs_file.cvs_path + ",v"
-
- def state(self):
- # We check out only live revisions.
- return 'Exp'
-
-
-class InternalRevisionReader(RevisionReader):
- """A RevisionReader that reads the contents from an own delta store."""
-
- _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
- _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
- _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
-
- def __init__(self, compress):
- self._compress = compress
-
- def register_artifacts(self, which_pass):
- artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
- artifact_manager.register_temp_file_needed(
- config.RCS_DELTAS_STORE, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.RCS_DELTAS_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.RCS_TREES_FILTERED_STORE, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
- )
-
- def start(self):
- self._delta_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
- artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
- DB_OPEN_READ)
- self._delta_db.__delitem__ = lambda id: None
- self._tree_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
- artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
- DB_OPEN_READ)
- ser = MarshalSerializer()
- if self._compress:
- ser = CompressingSerializer(ser)
- self._co_db = Database(
- artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
- ser)
-
- # The set of CVSFile instances whose TextRecords have already been
- # read:
- self._loaded_files = set()
-
- # A map { CVSFILE : _FileTree } for files that currently have live
- # revisions:
- self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
-
- def _get_text_record(self, cvs_rev):
- """Return the TextRecord instance for CVS_REV.
-
- If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
- do so now."""
-
- if cvs_rev.cvs_file not in self._loaded_files:
- for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
- self._text_record_db.add(text_record)
- self._loaded_files.add(cvs_rev.cvs_file)
-
- return self._text_record_db[cvs_rev.id]
-
- def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
- """Check out the text for revision C_REV from the repository.
-
- Return the text wrapped in a readable file object. If
- SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
- _un_expanded prior to returning the file content. Note that $Log$
- never actually generates a log (which makes test 'requires_cvs()'
- fail).
-
- Revisions may be requested in any order, but if they are not
- requested in dependency order the checkout database will become
- very large. Revisions may be skipped. Each revision may be
- requested only once."""
-
- try:
- text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
- except MalformedDeltaException, (msg):
- raise FatalError('Malformed RCS delta in %s, revision %s: %s'
- % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
- if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
- if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
- text = self._kw_re.sub(r'$\1$', text)
- else:
- text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
-
- return cStringIO.StringIO(text)
-
- def finish(self):
- self._text_record_db.log_leftovers()
-
- del self._text_record_db
- self._delta_db.close()
- self._tree_db.close()
- self._co_db.close()
-
diff --git a/cvs2svn_lib/collect_data.py b/cvs2svn_lib/collect_data.py
deleted file mode 100644
index 160d7b9..0000000
--- a/cvs2svn_lib/collect_data.py
+++ /dev/null
@@ -1,1431 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Data collection classes.
-
-This module contains the code used to collect data from the CVS
-repository. It parses *,v files, recording all useful information
-except for the actual file contents (though even the file contents
-might be recorded by the RevisionRecorder if one is configured).
-
-As a *,v file is parsed, the information pertaining to the file is
-accumulated in memory, mostly in _RevisionData, _BranchData, and
-_TagData objects. When parsing is complete, a final pass is made over
-the data to create some final dependency links, collect statistics,
-etc., then the _*Data objects are converted into CVSItem objects
-(CVSRevision, CVSBranch, and CVSTag respectively) and the CVSItems are
-dumped into databases.
-
-During the data collection, persistent unique ids are allocated to
-many types of objects: CVSFile, Symbol, and CVSItems. CVSItems are a
-special case. CVSItem ids are unique across all CVSItem types, and
-the ids are carried over from the corresponding data collection
-objects:
-
- _RevisionData -> CVSRevision
-
- _BranchData -> CVSBranch
-
- _TagData -> CVSTag
-
-In a later pass it is possible to convert tags <-> branches. But even
-if this occurs, the new branch or tag uses the same id as the old tag
-or branch.
-
-"""
-
-
-import os
-import stat
-import re
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import warning_prefix
-from cvs2svn_lib.common import error_prefix
-from cvs2svn_lib.common import IllegalSVNPathError
-from cvs2svn_lib.common import verify_svn_filename_legal
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.project import FileInAndOutOfAtticException
-from cvs2svn_lib.cvs_file import CVSPath
-from cvs2svn_lib.cvs_file import CVSDirectory
-from cvs2svn_lib.cvs_file import CVSFile
-from cvs2svn_lib.symbol import Symbol
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.cvs_item import CVSRevision
-from cvs2svn_lib.cvs_item import CVSBranch
-from cvs2svn_lib.cvs_item import CVSTag
-from cvs2svn_lib.cvs_item import cvs_revision_type_map
-from cvs2svn_lib.cvs_file_items import VendorBranchError
-from cvs2svn_lib.cvs_file_items import CVSFileItems
-from cvs2svn_lib.key_generator import KeyGenerator
-from cvs2svn_lib.cvs_item_database import NewCVSItemStore
-from cvs2svn_lib.symbol_statistics import SymbolStatisticsCollector
-from cvs2svn_lib.metadata_database import MetadataDatabase
-from cvs2svn_lib.metadata_database import MetadataLogger
-
-import cvs2svn_rcsparse
-
-
-# A regular expression defining "valid" revision numbers (used to
-# check that symbol definitions are reasonable).
-_valid_revision_re = re.compile(r'''
- ^
- (?:\d+\.)+ # Digit groups with trailing dots
- \d+ # And the last digit group.
- $
- ''', re.VERBOSE)
-
-_branch_revision_re = re.compile(r'''
- ^
- ((?:\d+\.\d+\.)+) # A nonzero even number of digit groups w/trailing dot
- (?:0\.)? # CVS sticks an extra 0 here; RCS does not
- (\d+) # And the last digit group
- $
- ''', re.VERBOSE)
-
-
-def rev_tuple(rev):
- """Return a tuple of integers corresponding to revision number REV.
-
- For example, if REV is '1.2.3.4', then return (1,2,3,4)."""
-
- return tuple([int(x) for x in rev.split('.')])
-
-
-def is_trunk_revision(rev):
- """Return True iff REV is a trunk revision.
-
- REV is a revision number corresponding to a specific revision (i.e.,
- not a whole branch)."""
-
- return rev.count('.') == 1
-
-
-def is_branch_revision_number(rev):
- """Return True iff REV is a branch revision number.
-
- REV is a CVS revision number in canonical form (i.e., with zeros
- removed). Return True iff it refers to a whole branch, as opposed
- to a single revision."""
-
- return rev.count('.') % 2 == 0
-
-
-def is_same_line_of_development(rev1, rev2):
- """Return True if rev1 and rev2 are on the same line of
- development (i.e., both on trunk, or both on the same branch);
- return False otherwise. Either rev1 or rev2 can be None, in
- which case automatically return False."""
-
- if rev1 is None or rev2 is None:
- return False
- if rev1.count('.') == 1 and rev2.count('.') == 1:
- return True
- if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
- return True
- return False
-
-
-class _RevisionData:
- """We track the state of each revision so that in set_revision_info,
- we can determine if our op is an add/change/delete. We can do this
- because in set_revision_info, we'll have all of the _RevisionData
- for a file at our fingertips, and we need to examine the state of
- our prev_rev to determine if we're an add or a change. Without the
- state of the prev_rev, we are unable to distinguish between an add
- and a change."""
-
- def __init__(self, cvs_rev_id, rev, timestamp, author, state):
- # The id of this revision:
- self.cvs_rev_id = cvs_rev_id
- self.rev = rev
- self.timestamp = timestamp
- self.author = author
- self.original_timestamp = timestamp
- self.state = state
-
- # If this is the first revision on a branch, then this is the
- # branch_data of that branch; otherwise it is None.
- self.parent_branch_data = None
-
- # The revision number of the parent of this revision along the
- # same line of development, if any. For the first revision R on a
- # branch, we consider the revision from which R sprouted to be the
- # 'parent'. If this is the root revision in the file's revision
- # tree, then this field is None.
- #
- # Note that this revision can't be determined arithmetically (due
- # to cvsadmin -o), which is why this field is necessary.
- self.parent = None
-
- # The revision number of the primary child of this revision (the
- # child along the same line of development), if any; otherwise,
- # None.
- self.child = None
-
- # The _BranchData instances of branches that sprout from this
- # revision, sorted in ascending order by branch number. It would
- # be inconvenient to initialize it here because we would have to
- # scan through all branches known by the _SymbolDataCollector to
- # find the ones having us as the parent. Instead, this
- # information is filled in by
- # _FileDataCollector._resolve_dependencies() and sorted by
- # _FileDataCollector._sort_branches().
- self.branches_data = []
-
- # The revision numbers of the first commits on any branches on
- # which commits occurred. This dependency is kept explicitly
- # because otherwise a revision-only topological sort would miss
- # the dependency that exists via branches_data.
- self.branches_revs_data = []
-
- # The _TagData instances of tags that are connected to this
- # revision.
- self.tags_data = []
-
- # A token that may be returned from
- # RevisionRecorder.record_text(). It can be used by
- # RevisionReader to obtain the text again.
- self.revision_recorder_token = None
-
- def get_first_on_branch_id(self):
- return self.parent_branch_data and self.parent_branch_data.id
-
-
-class _SymbolData:
- """Collection area for information about a symbol in a single CVSFile.
-
- SYMBOL is an instance of Symbol, undifferentiated as a Branch or a
- Tag regardless of whether self is a _BranchData or a _TagData."""
-
- def __init__(self, id, symbol):
- """Initialize an object for SYMBOL."""
-
- # The unique id that will be used for this particular symbol in
- # this particular file. This same id will be used for the CVSItem
- # that is derived from this instance.
- self.id = id
-
- # An instance of Symbol.
- self.symbol = symbol
-
-
-class _BranchData(_SymbolData):
- """Collection area for information about a Branch in a single CVSFile."""
-
- def __init__(self, id, symbol, branch_number):
- _SymbolData.__init__(self, id, symbol)
-
- # The branch number (e.g., '1.5.2') of this branch.
- self.branch_number = branch_number
-
- # The revision number of the revision from which this branch
- # sprouts (e.g., '1.5').
- self.parent = self.branch_number[:self.branch_number.rindex(".")]
-
- # The revision number of the first commit on this branch, if any
- # (e.g., '1.5.2.1'); otherwise, None.
- self.child = None
-
-
-class _TagData(_SymbolData):
- """Collection area for information about a Tag in a single CVSFile."""
-
- def __init__(self, id, symbol, rev):
- _SymbolData.__init__(self, id, symbol)
-
- # The revision number being tagged (e.g., '1.5.2.3').
- self.rev = rev
-
-
-class _SymbolDataCollector(object):
- """Collect information about symbols in a single CVSFile."""
-
- def __init__(self, fdc, cvs_file):
- self.fdc = fdc
- self.cvs_file = cvs_file
-
- self.pdc = self.fdc.pdc
- self.collect_data = self.fdc.collect_data
-
- # A list [(name, revision), ...] of symbols defined in the header
- # of the file. The name has already been transformed using the
- # symbol transform rules. If the symbol transform rules indicate
- # that the symbol should be ignored, then it is never added to
- # this list. This list is processed then deleted in
- # process_symbols().
- self._symbol_defs = []
-
- # A set containing the transformed names of symbols in this file
- # (used to detect duplicats during processing of unlabeled
- # branches):
- self._defined_symbols = set()
-
- # Map { branch_number : _BranchData }, where branch_number has an
- # odd number of digits.
- self.branches_data = { }
-
- # Map { revision : [ tag_data ] }, where revision has an even
- # number of digits, and the value is a list of _TagData objects
- # for tags that apply to that revision.
- self.tags_data = { }
-
- def _add_branch(self, name, branch_number):
- """Record that BRANCH_NUMBER is the branch number for branch NAME,
- and derive and record the revision from which NAME sprouts.
- BRANCH_NUMBER is an RCS branch number with an odd number of
- components, for example '1.7.2' (never '1.7.0.2'). Return the
- _BranchData instance (which is usually newly-created)."""
-
- branch_data = self.branches_data.get(branch_number)
-
- if branch_data is not None:
- Log().warn(
- "%s: in '%s':\n"
- " branch '%s' already has name '%s',\n"
- " cannot also have name '%s', ignoring the latter\n"
- % (warning_prefix,
- self.cvs_file.filename, branch_number,
- branch_data.symbol.name, name)
- )
- return branch_data
-
- symbol = self.pdc.get_symbol(name)
- branch_data = _BranchData(
- self.collect_data.item_key_generator.gen_id(), symbol, branch_number
- )
- self.branches_data[branch_number] = branch_data
- return branch_data
-
- def _construct_distinct_name(self, name, original_name):
- """Construct a distinct symbol name from NAME.
-
- If NAME is distinct, return it. If it is already used in this
- file (as determined from its presence in self._defined_symbols),
- construct and return a new name that is not already used."""
-
- if name not in self._defined_symbols:
- return name
- else:
- index = 1
- while True:
- dup_name = '%s-DUPLICATE-%d' % (name, index,)
- if dup_name not in self._defined_symbols:
- self.collect_data.record_fatal_error(
- "Symbol name '%s' is already used in '%s'.\n"
- "The unlabeled branch '%s' must be renamed using "
- "--symbol-transform."
- % (name, self.cvs_file.filename, original_name,)
- )
- return dup_name
-
- def _add_unlabeled_branch(self, branch_number):
- original_name = "unlabeled-" + branch_number
- name = self.transform_symbol(original_name, branch_number)
- if name is None:
- self.collect_data.record_fatal_error(
- "The unlabeled branch '%s' in '%s' contains commits.\n"
- "It may not be ignored via a symbol transform. (Use --exclude "
- "instead.)"
- % (original_name, self.cvs_file.filename,)
- )
- # Retain the original name to allow the conversion to continue:
- name = original_name
-
- distinct_name = self._construct_distinct_name(name, original_name)
- self._defined_symbols.add(distinct_name)
- return self._add_branch(distinct_name, branch_number)
-
- def _add_tag(self, name, revision):
- """Record that tag NAME refers to the specified REVISION."""
-
- symbol = self.pdc.get_symbol(name)
- tag_data = _TagData(
- self.collect_data.item_key_generator.gen_id(), symbol, revision
- )
- self.tags_data.setdefault(revision, []).append(tag_data)
- return tag_data
-
- def transform_symbol(self, name, revision):
- """Transform a symbol according to the project's symbol transforms.
-
- Transform the symbol with the original name NAME and canonicalized
- revision number REVISION. Return the new symbol name or None if
- the symbol should be ignored entirely.
-
- Log the results of the symbol transform if necessary."""
-
- old_name = name
- # Apply any user-defined symbol transforms to the symbol name:
- name = self.cvs_file.project.transform_symbol(
- self.cvs_file, name, revision
- )
-
- if name is None:
- # Ignore symbol:
- self.pdc.log_symbol_transform(old_name, None)
- Log().verbose(
- " symbol '%s'=%s ignored in %s"
- % (old_name, revision, self.cvs_file.filename,)
- )
- else:
- if name != old_name:
- self.pdc.log_symbol_transform(old_name, name)
- Log().verbose(
- " symbol '%s'=%s transformed to '%s' in %s"
- % (old_name, revision, name, self.cvs_file.filename,)
- )
-
- return name
-
- def define_symbol(self, name, revision):
- """Record a symbol definition for later processing."""
-
- # Canonicalize the revision number:
- revision = _branch_revision_re.sub(r'\1\2', revision)
-
- # Apply any user-defined symbol transforms to the symbol name:
- name = self.transform_symbol(name, revision)
-
- if name is not None:
- # Verify that the revision number is valid:
- if _valid_revision_re.match(revision):
- # The revision number is valid; record it for later processing:
- self._symbol_defs.append( (name, revision) )
- else:
- Log().warn(
- 'In %r:\n'
- ' branch %r references invalid revision %s\n'
- ' and will be ignored.'
- % (self.cvs_file.filename, name, revision,)
- )
-
- def _eliminate_trivial_duplicate_defs(self, symbol_defs):
- """Iterate through SYMBOL_DEFS, Removing identical duplicate definitions.
-
- Duplicate definitions of symbol names have been seen in the wild,
- and they can also happen when --symbol-transform is used. If a
- symbol is defined to the same revision number repeatedly, then
- ignore all but the last definition."""
-
- # Make a copy, since we have to iterate through the definitions
- # twice:
- symbol_defs = list(symbol_defs)
-
- # A map { (name, revision) : [index,...] } of the indexes where
- # symbol definitions name=revision were found:
- known_definitions = {}
- for (i, symbol_def) in enumerate(symbol_defs):
- known_definitions.setdefault(symbol_def, []).append(i)
-
- # A set of the indexes of entries that have to be removed from
- # symbol_defs:
- dup_indexes = set()
- for ((name, revision), indexes) in known_definitions.iteritems():
- if len(indexes) > 1:
- Log().verbose(
- "in %r:\n"
- " symbol %s:%s defined multiple times; ignoring duplicates\n"
- % (self.cvs_file.filename, name, revision,)
- )
- dup_indexes.update(indexes[:-1])
-
- for (i, symbol_def) in enumerate(symbol_defs):
- if i not in dup_indexes:
- yield symbol_def
-
- def _process_duplicate_defs(self, symbol_defs):
- """Iterate through SYMBOL_DEFS, processing duplicate names.
-
- Duplicate definitions of symbol names have been seen in the wild,
- and they can also happen when --symbol-transform is used. If a
- symbol is defined multiple times, then it is a fatal error. This
- method should be called after _eliminate_trivial_duplicate_defs()."""
-
- # Make a copy, since we have to access multiple times:
- symbol_defs = list(symbol_defs)
-
- # A map {name : [index,...]} mapping the names of symbols to a
- # list of their definitions' indexes in symbol_defs:
- known_symbols = {}
- for (i, (name, revision)) in enumerate(symbol_defs):
- known_symbols.setdefault(name, []).append(i)
-
- known_symbols = known_symbols.items()
- known_symbols.sort()
- dup_indexes = set()
- for (name, indexes) in known_symbols:
- if len(indexes) > 1:
- # This symbol was defined multiple times.
- self.collect_data.record_fatal_error(
- "Multiple definitions of the symbol '%s' in '%s': %s" % (
- name, self.cvs_file.filename,
- ' '.join([symbol_defs[i][1] for i in indexes]),
- )
- )
- # Ignore all but the last definition for now, to allow the
- # conversion to proceed:
- dup_indexes.update(indexes[:-1])
-
- for (i, symbol_def) in enumerate(symbol_defs):
- if i not in dup_indexes:
- yield symbol_def
-
- def _process_symbol(self, name, revision):
- """Process a symbol called NAME, which is associated with REVISON.
-
- REVISION is a canonical revision number with zeros removed, for
- example: '1.7', '1.7.2', or '1.1.1' or '1.1.1.1'. NAME is a
- transformed branch or tag name."""
-
- # Add symbol to our records:
- if is_branch_revision_number(revision):
- self._add_branch(name, revision)
- else:
- self._add_tag(name, revision)
-
- def process_symbols(self):
- """Process the symbol definitions from SELF._symbol_defs."""
-
- symbol_defs = self._symbol_defs
- del self._symbol_defs
-
- symbol_defs = self._eliminate_trivial_duplicate_defs(symbol_defs)
- symbol_defs = self._process_duplicate_defs(symbol_defs)
-
- for (name, revision) in symbol_defs:
- self._defined_symbols.add(name)
- self._process_symbol(name, revision)
-
- @staticmethod
- def rev_to_branch_number(revision):
- """Return the branch_number of the branch on which REVISION lies.
-
- REVISION is a branch revision number with an even number of
- components; for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
- The return value is the branch number (for example, '1.7.2').
- Return none iff REVISION is a trunk revision such as '1.2'."""
-
- if is_trunk_revision(revision):
- return None
- return revision[:revision.rindex(".")]
-
- def rev_to_branch_data(self, revision):
- """Return the branch_data of the branch on which REVISION lies.
-
- REVISION must be a branch revision number with an even number of
- components; for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
- Raise KeyError iff REVISION is unknown."""
-
- assert not is_trunk_revision(revision)
-
- return self.branches_data[self.rev_to_branch_number(revision)]
-
- def rev_to_lod(self, revision):
- """Return the line of development on which REVISION lies.
-
- REVISION must be a revision number with an even number of
- components. Raise KeyError iff REVISION is unknown."""
-
- if is_trunk_revision(revision):
- return self.pdc.trunk
- else:
- return self.rev_to_branch_data(revision).symbol
-
-
-class _FileDataCollector(cvs2svn_rcsparse.Sink):
- """Class responsible for collecting RCS data for a particular file.
-
- Any collected data that need to be remembered are stored into the
- referenced CollectData instance."""
-
- def __init__(self, pdc, cvs_file):
- """Create an object that is prepared to receive data for CVS_FILE.
- CVS_FILE is a CVSFile instance. COLLECT_DATA is used to store the
- information collected about the file."""
-
- self.pdc = pdc
- self.cvs_file = cvs_file
-
- self.collect_data = self.pdc.collect_data
- self.project = self.cvs_file.project
-
- # A place to store information about the symbols in this file:
- self.sdc = _SymbolDataCollector(self, self.cvs_file)
-
- # { revision : _RevisionData instance }
- self._rev_data = { }
-
- # Lists [ (parent, child) ] of revision number pairs indicating
- # that revision child depends on revision parent along the main
- # line of development.
- self._primary_dependencies = []
-
- # If set, this is an RCS branch number -- rcsparse calls this the
- # "principal branch", but CVS and RCS refer to it as the "default
- # branch", so that's what we call it, even though the rcsparse API
- # setter method is still 'set_principal_branch'.
- self.default_branch = None
-
- # True iff revision 1.1 of the file appears to have been imported
- # (as opposed to added normally).
- self._file_imported = False
-
- def _get_rev_id(self, revision):
- if revision is None:
- return None
- return self._rev_data[revision].cvs_rev_id
-
- def set_principal_branch(self, branch):
- """This is a callback method declared in Sink."""
-
- if branch.find('.') == -1:
- # This just sets the default branch to trunk. Normally this
- # shouldn't occur, but it has been seen in at least one CVS
- # repository. Just ignore it.
- pass
- else:
- self.default_branch = branch
-
- def set_expansion(self, mode):
- """This is a callback method declared in Sink."""
-
- self.cvs_file.mode = mode
-
- def define_tag(self, name, revision):
- """Remember the symbol name and revision, but don't process them yet.
-
- This is a callback method declared in Sink."""
-
- self.sdc.define_symbol(name, revision)
-
- def admin_completed(self):
- """This is a callback method declared in Sink."""
-
- self.sdc.process_symbols()
-
- def define_revision(self, revision, timestamp, author, state,
- branches, next):
- """This is a callback method declared in Sink."""
-
- for branch in branches:
- try:
- branch_data = self.sdc.rev_to_branch_data(branch)
- except KeyError:
- # Normally we learn about the branches from the branch names
- # and numbers parsed from the symbolic name header. But this
- # must have been an unlabeled branch that slipped through the
- # net. Generate a name for it and create a _BranchData record
- # for it now.
- branch_data = self.sdc._add_unlabeled_branch(
- self.sdc.rev_to_branch_number(branch))
-
- assert branch_data.child is None
- branch_data.child = branch
-
- if revision in self._rev_data:
- # This revision has already been seen.
- Log().error('File %r contains duplicate definitions of revision %s.'
- % (self.cvs_file.filename, revision,))
- raise RuntimeError
-
- # Record basic information about the revision:
- rev_data = _RevisionData(
- self.collect_data.item_key_generator.gen_id(),
- revision, int(timestamp), author, state)
- self._rev_data[revision] = rev_data
-
- # When on trunk, the RCS 'next' revision number points to what
- # humans might consider to be the 'previous' revision number. For
- # example, 1.3's RCS 'next' is 1.2.
- #
- # However, on a branch, the RCS 'next' revision number really does
- # point to what humans would consider to be the 'next' revision
- # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
- #
- # In other words, in RCS, 'next' always means "where to find the next
- # deltatext that you need this revision to retrieve.
- #
- # That said, we don't *want* RCS's behavior here, so we determine
- # whether we're on trunk or a branch and set the dependencies
- # accordingly.
- if next:
- if is_trunk_revision(revision):
- self._primary_dependencies.append( (next, revision,) )
- else:
- self._primary_dependencies.append( (revision, next,) )
-
- def _resolve_primary_dependencies(self):
- """Resolve the dependencies listed in self._primary_dependencies."""
-
- for (parent, child,) in self._primary_dependencies:
- parent_data = self._rev_data[parent]
- assert parent_data.child is None
- parent_data.child = child
-
- child_data = self._rev_data[child]
- assert child_data.parent is None
- child_data.parent = parent
-
- def _resolve_branch_dependencies(self):
- """Resolve dependencies involving branches."""
-
- for branch_data in self.sdc.branches_data.values():
- # The branch_data's parent has the branch as a child regardless
- # of whether the branch had any subsequent commits:
- try:
- parent_data = self._rev_data[branch_data.parent]
- except KeyError:
- Log().warn(
- 'In %r:\n'
- ' branch %r references non-existing revision %s\n'
- ' and will be ignored.'
- % (self.cvs_file.filename, branch_data.symbol.name,
- branch_data.parent,))
- del self.sdc.branches_data[branch_data.branch_number]
- else:
- parent_data.branches_data.append(branch_data)
-
- # If the branch has a child (i.e., something was committed on
- # the branch), then we store a reference to the branch_data
- # there, define the child's parent to be the branch's parent,
- # and list the child in the branch parent's branches_revs_data:
- if branch_data.child is not None:
- child_data = self._rev_data[branch_data.child]
- assert child_data.parent_branch_data is None
- child_data.parent_branch_data = branch_data
- assert child_data.parent is None
- child_data.parent = branch_data.parent
- parent_data.branches_revs_data.append(branch_data.child)
-
- def _sort_branches(self):
- """Sort the branches sprouting from each revision in creation order.
-
- Creation order is taken to be the reverse of the order that they
- are listed in the symbols part of the RCS file. (If a branch is
- created then deleted, a later branch can be assigned the recycled
- branch number; therefore branch numbers are not an indication of
- creation order.)"""
-
- for rev_data in self._rev_data.values():
- rev_data.branches_data.sort(lambda a, b: - cmp(a.id, b.id))
-
- def _resolve_tag_dependencies(self):
- """Resolve dependencies involving tags."""
-
- for (rev, tag_data_list) in self.sdc.tags_data.items():
- try:
- parent_data = self._rev_data[rev]
- except KeyError:
- Log().warn(
- 'In %r:\n'
- ' the following tag(s) reference non-existing revision %s\n'
- ' and will be ignored:\n'
- ' %s' % (
- self.cvs_file.filename, rev,
- ', '.join([repr(tag_data.symbol.name)
- for tag_data in tag_data_list]),))
- del self.sdc.tags_data[rev]
- else:
- for tag_data in tag_data_list:
- assert tag_data.rev == rev
- # The tag_data's rev has the tag as a child:
- parent_data.tags_data.append(tag_data)
-
- def _determine_operation(self, rev_data):
- prev_rev_data = self._rev_data.get(rev_data.parent)
- return cvs_revision_type_map[(
- rev_data.state != 'dead',
- prev_rev_data is not None and prev_rev_data.state != 'dead',
- )]
-
- def _get_cvs_revision(self, rev_data):
- """Create and return a CVSRevision for REV_DATA."""
-
- branch_ids = [
- branch_data.id
- for branch_data in rev_data.branches_data
- ]
-
- branch_commit_ids = [
- self._get_rev_id(rev)
- for rev in rev_data.branches_revs_data
- ]
-
- tag_ids = [
- tag_data.id
- for tag_data in rev_data.tags_data
- ]
-
- revision_type = self._determine_operation(rev_data)
-
- return revision_type(
- self._get_rev_id(rev_data.rev), self.cvs_file,
- rev_data.timestamp, None,
- self._get_rev_id(rev_data.parent),
- self._get_rev_id(rev_data.child),
- rev_data.rev,
- True,
- self.sdc.rev_to_lod(rev_data.rev),
- rev_data.get_first_on_branch_id(),
- False, None, None,
- tag_ids, branch_ids, branch_commit_ids,
- rev_data.revision_recorder_token)
-
- def _get_cvs_revisions(self):
- """Generate the CVSRevisions present in this file."""
-
- for rev_data in self._rev_data.itervalues():
- yield self._get_cvs_revision(rev_data)
-
- def _get_cvs_branches(self):
- """Generate the CVSBranches present in this file."""
-
- for branch_data in self.sdc.branches_data.values():
- yield CVSBranch(
- branch_data.id, self.cvs_file, branch_data.symbol,
- branch_data.branch_number,
- self.sdc.rev_to_lod(branch_data.parent),
- self._get_rev_id(branch_data.parent),
- self._get_rev_id(branch_data.child),
- None,
- )
-
- def _get_cvs_tags(self):
- """Generate the CVSTags present in this file."""
-
- for tags_data in self.sdc.tags_data.values():
- for tag_data in tags_data:
- yield CVSTag(
- tag_data.id, self.cvs_file, tag_data.symbol,
- self.sdc.rev_to_lod(tag_data.rev),
- self._get_rev_id(tag_data.rev),
- None,
- )
-
- def tree_completed(self):
- """The revision tree has been parsed.
-
- Analyze it for consistency and connect some loose ends.
-
- This is a callback method declared in Sink."""
-
- self._resolve_primary_dependencies()
- self._resolve_branch_dependencies()
- self._sort_branches()
- self._resolve_tag_dependencies()
-
- # Compute the preliminary CVSFileItems for this file:
- cvs_items = []
- cvs_items.extend(self._get_cvs_revisions())
- cvs_items.extend(self._get_cvs_branches())
- cvs_items.extend(self._get_cvs_tags())
- self._cvs_file_items = CVSFileItems(
- self.cvs_file, self.pdc.trunk, cvs_items
- )
-
- self._cvs_file_items.check_link_consistency()
-
- # Tell the revision recorder about the file dependency tree.
- self.collect_data.revision_recorder.start_file(self._cvs_file_items)
-
- def set_revision_info(self, revision, log, text):
- """This is a callback method declared in Sink."""
-
- rev_data = self._rev_data[revision]
- cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id]
-
- if cvs_rev.metadata_id is not None:
- # Users have reported problems with repositories in which the
- # deltatext block for revision 1.1 appears twice. It is not
- # known whether this results from a CVS/RCS bug, or from botched
- # hand-editing of the repository. In any case, empirically, cvs
- # and rcs both use the first version when checking out data, so
- # that's what we will do. (For the record: "cvs log" fails on
- # such a file; "rlog" prints the log message from the first
- # block and ignores the second one.)
- Log().warn(
- "%s: in '%s':\n"
- " Deltatext block for revision %s appeared twice;\n"
- " ignoring the second occurrence.\n"
- % (warning_prefix, self.cvs_file.filename, revision,)
- )
- return
-
- if is_trunk_revision(revision):
- branch_name = None
- else:
- branch_name = self.sdc.rev_to_branch_data(revision).symbol.name
-
- cvs_rev.metadata_id = self.collect_data.metadata_logger.store(
- self.project, branch_name, rev_data.author, log
- )
- cvs_rev.deltatext_exists = bool(text)
-
- # If this is revision 1.1, determine whether the file appears to
- # have been created via 'cvs add' instead of 'cvs import'. The
- # test is that the log message CVS uses for 1.1 in imports is
- # "Initial revision\n" with no period. (This fact helps determine
- # whether this file might have had a default branch in the past.)
- if revision == '1.1':
- self._file_imported = (log == 'Initial revision\n')
-
- cvs_rev.revision_recorder_token = \
- self.collect_data.revision_recorder.record_text(cvs_rev, log, text)
-
- def parse_completed(self):
- """Finish the processing of this file.
-
- This is a callback method declared in Sink."""
-
- # Make sure that there was an info section for each revision:
- for cvs_item in self._cvs_file_items.values():
- if isinstance(cvs_item, CVSRevision) and cvs_item.metadata_id is None:
- self.collect_data.record_fatal_error(
- '%r has no deltatext section for revision %s'
- % (self.cvs_file.filename, cvs_item.rev,)
- )
-
- def _process_ntdbrs(self):
- """Fix up any non-trunk default branch revisions (if present).
-
- If a non-trunk default branch is determined to have existed, yield
- the _RevisionData.ids for all revisions that were once non-trunk
- default revisions, in dependency order.
-
- There are two cases to handle:
-
- One case is simple. The RCS file lists a default branch
- explicitly in its header, such as '1.1.1'. In this case, we know
- that every revision on the vendor branch is to be treated as head
- of trunk at that point in time.
-
- But there's also a degenerate case. The RCS file does not
- currently have a default branch, yet we can deduce that for some
- period in the past it probably *did* have one. For example, the
- file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are
- dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated
- after 1.2. In this case, we should record 1.1.1.96 as the last
- vendor revision to have been the head of the default branch.
-
- If any non-trunk default branch revisions are found:
-
- - Set their ntdbr members to True.
-
- - Connect the last one with revision 1.2.
-
- - Remove revision 1.1 if it is not needed.
-
- """
-
- try:
- if self.default_branch:
- vendor_cvs_branch_id = self.sdc.branches_data[self.default_branch].id
- vendor_lod_items = self._cvs_file_items.get_lod_items(
- self._cvs_file_items[vendor_cvs_branch_id]
- )
- if not self._cvs_file_items.process_live_ntdb(vendor_lod_items):
- return
- elif self._file_imported:
- vendor_branch_data = self.sdc.branches_data.get('1.1.1')
- if vendor_branch_data is None:
- return
- else:
- vendor_lod_items = self._cvs_file_items.get_lod_items(
- self._cvs_file_items[vendor_branch_data.id]
- )
- if not self._cvs_file_items.process_historical_ntdb(
- vendor_lod_items
- ):
- return
- else:
- return
- except VendorBranchError, e:
- self.collect_data.record_fatal_error(str(e))
- return
-
- if self._file_imported:
- self._cvs_file_items.imported_remove_1_1(vendor_lod_items)
-
- self._cvs_file_items.check_link_consistency()
-
- def get_cvs_file_items(self):
- """Finish up and return a CVSFileItems instance for this file.
-
- This method must only be called once."""
-
- self._process_ntdbrs()
-
- # Break a circular reference loop, allowing the memory for self
- # and sdc to be freed.
- del self.sdc
-
- return self._cvs_file_items
-
-
-class _ProjectDataCollector:
- def __init__(self, collect_data, project):
- self.collect_data = collect_data
- self.project = project
- self.num_files = 0
-
- # The Trunk LineOfDevelopment object for this project:
- self.trunk = Trunk(
- self.collect_data.symbol_key_generator.gen_id(), self.project
- )
- self.project.trunk_id = self.trunk.id
-
- # This causes a record for self.trunk to spring into existence:
- self.collect_data.symbol_stats[self.trunk]
-
- # A map { name -> Symbol } for all known symbols in this project.
- # The symbols listed here are undifferentiated into Branches and
- # Tags because the same name might appear as a branch in one file
- # and a tag in another.
- self.symbols = {}
-
- # A map { (old_name, new_name) : count } indicating how many files
- # were affected by each each symbol name transformation:
- self.symbol_transform_counts = {}
-
- def get_symbol(self, name):
- """Return the Symbol object for the symbol named NAME in this project.
-
- If such a symbol does not yet exist, allocate a new symbol_id,
- create a Symbol instance, store it in self.symbols, and return it."""
-
- symbol = self.symbols.get(name)
- if symbol is None:
- symbol = Symbol(
- self.collect_data.symbol_key_generator.gen_id(),
- self.project, name)
- self.symbols[name] = symbol
- return symbol
-
- def log_symbol_transform(self, old_name, new_name):
- """Record that OLD_NAME was transformed to NEW_NAME in one file.
-
- This information is used to generated a statistical summary of
- symbol transforms."""
-
- try:
- self.symbol_transform_counts[old_name, new_name] += 1
- except KeyError:
- self.symbol_transform_counts[old_name, new_name] = 1
-
- def summarize_symbol_transforms(self):
- if self.symbol_transform_counts and Log().is_on(Log.NORMAL):
- log = Log()
- log.normal('Summary of symbol transforms:')
- transforms = self.symbol_transform_counts.items()
- transforms.sort()
- for ((old_name, new_name), count) in transforms:
- if new_name is None:
- log.normal(' "%s" ignored in %d files' % (old_name, count,))
- else:
- log.normal(
- ' "%s" transformed to "%s" in %d files'
- % (old_name, new_name, count,)
- )
-
- def _process_cvs_file_items(self, cvs_file_items):
- """Process the CVSFileItems from one CVSFile."""
-
- # Remove CVSRevisionDeletes that are not needed:
- cvs_file_items.remove_unneeded_deletes(self.collect_data.metadata_db)
-
- # Remove initial branch deletes that are not needed:
- cvs_file_items.remove_initial_branch_deletes(
- self.collect_data.metadata_db
- )
-
- # If this is a --trunk-only conversion, discard all branches and
- # tags, then draft any non-trunk default branch revisions to
- # trunk:
- if Ctx().trunk_only:
- cvs_file_items.exclude_non_trunk()
-
- self.collect_data.revision_recorder.finish_file(cvs_file_items)
- self.collect_data.add_cvs_file_items(cvs_file_items)
- self.collect_data.symbol_stats.register(cvs_file_items)
-
- def process_file(self, cvs_file):
- Log().normal(cvs_file.filename)
- fdc = _FileDataCollector(self, cvs_file)
- try:
- cvs2svn_rcsparse.parse(open(cvs_file.filename, 'rb'), fdc)
- except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
- self.collect_data.record_fatal_error(
- "%r is not a valid ,v file" % (cvs_file.filename,)
- )
- # Abort the processing of this file, but let the pass continue
- # with other files:
- return
- except:
- Log().warn("Exception occurred while parsing %s" % cvs_file.filename)
- raise
- else:
- self.num_files += 1
-
- cvs_file_items = fdc.get_cvs_file_items()
-
- del fdc
-
- self._process_cvs_file_items(cvs_file_items)
-
-
-class CollectData:
- """Repository for data collected by parsing the CVS repository files.
-
- This class manages the databases into which information collected
- from the CVS repository is stored. The data are stored into this
- class by _FileDataCollector instances, one of which is created for
- each file to be parsed."""
-
- def __init__(self, revision_recorder, stats_keeper):
- self.revision_recorder = revision_recorder
- self._cvs_item_store = NewCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
- self.metadata_db = MetadataDatabase(
- artifact_manager.get_temp_file(config.METADATA_STORE),
- artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
- DB_OPEN_NEW,
- )
- self.metadata_logger = MetadataLogger(self.metadata_db)
- self.fatal_errors = []
- self.num_files = 0
- self.symbol_stats = SymbolStatisticsCollector()
- self.stats_keeper = stats_keeper
-
- # Key generator for CVSFiles:
- self.file_key_generator = KeyGenerator()
-
- # Key generator for CVSItems:
- self.item_key_generator = KeyGenerator()
-
- # Key generator for Symbols:
- self.symbol_key_generator = KeyGenerator()
-
- self.revision_recorder.start()
-
- def record_fatal_error(self, err):
- """Record that fatal error ERR was found.
-
- ERR is a string (without trailing newline) describing the error.
- Output the error to stderr immediately, and record a copy to be
- output again in a summary at the end of CollectRevsPass."""
-
- err = '%s: %s' % (error_prefix, err,)
- Log().error(err + '\n')
- self.fatal_errors.append(err)
-
- def add_cvs_directory(self, cvs_directory):
- """Record CVS_DIRECTORY."""
-
- Ctx()._cvs_file_db.log_file(cvs_directory)
-
- def add_cvs_file_items(self, cvs_file_items):
- """Record the information from CVS_FILE_ITEMS.
-
- Store the CVSFile to _cvs_file_db under its persistent id, store
- the CVSItems, and record the CVSItems to self.stats_keeper."""
-
- Ctx()._cvs_file_db.log_file(cvs_file_items.cvs_file)
- self._cvs_item_store.add(cvs_file_items)
-
- self.stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
- for cvs_item in cvs_file_items.values():
- self.stats_keeper.record_cvs_item(cvs_item)
-
- def _get_cvs_file(
- self, parent_directory, basename, file_in_attic, leave_in_attic=False
- ):
- """Return a CVSFile describing the file with name BASENAME.
-
- PARENT_DIRECTORY is the CVSDirectory instance describing the
- directory that physically holds this file in the filesystem.
- BASENAME must be the base name of a *,v file within
- PARENT_DIRECTORY.
-
- FILE_IN_ATTIC is a boolean telling whether the specified file is
- in an Attic subdirectory. If FILE_IN_ATTIC is True, then:
-
- - If LEAVE_IN_ATTIC is True, then leave the 'Attic' component in
- the filename.
-
- - Otherwise, raise FileInAndOutOfAtticException if a file with the
- same filename appears outside of Attic.
-
- The CVSFile is assigned a new unique id. All of the CVSFile
- information is filled in except mode (which can only be determined
- by parsing the file).
-
- Raise FatalError if the resulting filename would not be legal in
- SVN."""
-
- filename = os.path.join(parent_directory.filename, basename)
- try:
- verify_svn_filename_legal(basename[:-2])
- except IllegalSVNPathError, e:
- raise FatalError(
- 'File %r would result in an illegal SVN filename: %s'
- % (filename, e,)
- )
-
- if file_in_attic and not leave_in_attic:
- in_attic = True
- logical_parent_directory = parent_directory.parent_directory
-
- # If this file also exists outside of the attic, it's a fatal
- # error:
- non_attic_filename = os.path.join(
- logical_parent_directory.filename, basename,
- )
- if os.path.exists(non_attic_filename):
- raise FileInAndOutOfAtticException(non_attic_filename, filename)
- else:
- in_attic = False
- logical_parent_directory = parent_directory
-
- file_stat = os.stat(filename)
-
- # The size of the file in bytes:
- file_size = file_stat[stat.ST_SIZE]
-
- # Whether or not the executable bit is set:
- file_executable = bool(file_stat[0] & stat.S_IXUSR)
-
- # mode is not known, so we temporarily set it to None.
- return CVSFile(
- self.file_key_generator.gen_id(),
- parent_directory.project, logical_parent_directory, basename[:-2],
- in_attic, file_executable, file_size, None
- )
-
- def _get_attic_file(self, parent_directory, basename):
- """Return a CVSFile object for the Attic file at BASENAME.
-
- PARENT_DIRECTORY is the CVSDirectory that physically contains the
- file on the filesystem (i.e., the Attic directory). It is not
- necessarily the parent_directory of the CVSFile that will be
- returned.
-
- Return CVSFile, whose parent directory is usually
- PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY
- iff CVSFile will remain in the Attic directory."""
-
- try:
- return self._get_cvs_file(parent_directory, basename, True)
- except FileInAndOutOfAtticException, e:
- if Ctx().retain_conflicting_attic_files:
- Log().warn(
- "%s: %s;\n"
- " storing the latter into 'Attic' subdirectory.\n"
- % (warning_prefix, e)
- )
- else:
- self.record_fatal_error(str(e))
-
- # Either way, return a CVSFile object so that the rest of the
- # file processing can proceed:
- return self._get_cvs_file(
- parent_directory, basename, True, leave_in_attic=True
- )
-
- def _generate_attic_cvs_files(self, cvs_directory):
- """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY.
-
- Also add CVS_DIRECTORY to self if any files are being retained in
- that directory."""
-
- retained_attic_file = False
-
- fnames = os.listdir(cvs_directory.filename)
- fnames.sort()
- for fname in fnames:
- pathname = os.path.join(cvs_directory.filename, fname)
- if os.path.isdir(pathname):
- Log().warn("Directory %s found within Attic; ignoring" % (pathname,))
- elif fname.endswith(',v'):
- cvs_file = self._get_attic_file(cvs_directory, fname)
- if cvs_file.parent_directory == cvs_directory:
- # This file will be retained in the Attic directory.
- retained_attic_file = True
- yield cvs_file
-
- if retained_attic_file:
- # If any files were retained in the Attic directory, then write
- # the Attic directory to CVSFileDatabase:
- self.add_cvs_directory(cvs_directory)
-
- def _get_non_attic_file(self, parent_directory, basename):
- """Return a CVSFile object for the non-Attic file at BASENAME."""
-
- return self._get_cvs_file(parent_directory, basename, False)
-
- def _generate_cvs_files(self, cvs_directory):
- """Generate the CVSFiles under non-Attic directory CVS_DIRECTORY.
-
- Process directories recursively, including Attic directories.
- Also create and register CVSDirectories as they are found, and
- look for conflicts between the filenames that will result from
- files, attic files, and subdirectories."""
-
- self.add_cvs_directory(cvs_directory)
-
- # Map {cvs_file.basename : cvs_file.filename} for files directly
- # in cvs_directory:
- rcsfiles = {}
-
- attic_dir = None
-
- # Non-Attic subdirectories of cvs_directory (to be recursed into):
- dirs = []
-
- fnames = os.listdir(cvs_directory.filename)
- fnames.sort()
- for fname in fnames:
- pathname = os.path.join(cvs_directory.filename, fname)
- if os.path.isdir(pathname):
- if fname == 'Attic':
- attic_dir = fname
- else:
- dirs.append(fname)
- elif fname.endswith(',v'):
- cvs_file = self._get_non_attic_file(cvs_directory, fname)
- rcsfiles[cvs_file.basename] = cvs_file.filename
- yield cvs_file
- else:
- # Silently ignore other files:
- pass
-
- # Map {cvs_file.basename : cvs_file.filename} for files in an
- # Attic directory within cvs_directory:
- attic_rcsfiles = {}
-
- if attic_dir is not None:
- attic_directory = CVSDirectory(
- self.file_key_generator.gen_id(),
- cvs_directory.project, cvs_directory, 'Attic',
- )
-
- for cvs_file in self._generate_attic_cvs_files(attic_directory):
- if cvs_file.parent_directory == cvs_directory:
- attic_rcsfiles[cvs_file.basename] = cvs_file.filename
- yield cvs_file
-
- alldirs = dirs + [attic_dir]
- else:
- alldirs = dirs
-
- # Check for conflicts between directory names and the filenames
- # that will result from the rcs files (both in this directory and
- # in attic). (We recurse into the subdirectories nevertheless, to
- # try to detect more problems.)
- for fname in alldirs:
- pathname = os.path.join(cvs_directory.filename, fname)
- for rcsfile_list in [rcsfiles, attic_rcsfiles]:
- if fname in rcsfile_list:
- self.record_fatal_error(
- 'Directory name conflicts with filename. Please remove or '
- 'rename one\n'
- 'of the following:\n'
- ' "%s"\n'
- ' "%s"'
- % (pathname, rcsfile_list[fname],)
- )
-
- # Now recurse into the other subdirectories:
- for fname in dirs:
- dirname = os.path.join(cvs_directory.filename, fname)
-
- # Verify that the directory name does not contain any illegal
- # characters:
- try:
- verify_svn_filename_legal(fname)
- except IllegalSVNPathError, e:
- raise FatalError(
- 'Directory %r would result in an illegal SVN path name: %s'
- % (dirname, e,)
- )
-
- sub_directory = CVSDirectory(
- self.file_key_generator.gen_id(),
- cvs_directory.project, cvs_directory, fname,
- )
-
- for cvs_file in self._generate_cvs_files(sub_directory):
- yield cvs_file
-
- def process_project(self, project):
- Ctx()._projects[project.id] = project
-
- root_cvs_directory = CVSDirectory(
- self.file_key_generator.gen_id(), project, None, ''
- )
- project.root_cvs_directory_id = root_cvs_directory.id
- pdc = _ProjectDataCollector(self, project)
-
- found_rcs_file = False
- for cvs_file in self._generate_cvs_files(root_cvs_directory):
- pdc.process_file(cvs_file)
- found_rcs_file = True
-
- if not found_rcs_file:
- self.record_fatal_error(
- 'No RCS files found under %r!\n'
- 'Are you absolutely certain you are pointing cvs2svn\n'
- 'at a CVS repository?\n'
- % (project.project_cvs_repos_path,)
- )
-
- pdc.summarize_symbol_transforms()
-
- self.num_files += pdc.num_files
- Log().verbose('Processed', self.num_files, 'files')
-
- def _set_cvs_path_ordinals(self):
- cvs_files = list(Ctx()._cvs_file_db.itervalues())
- cvs_files.sort(CVSPath.slow_compare)
- for (i, cvs_file) in enumerate(cvs_files):
- cvs_file.ordinal = i
-
- def close(self):
- """Close the data structures associated with this instance.
-
- Return a list of fatal errors encountered while processing input.
- Each list entry is a string describing one fatal error."""
-
- self.revision_recorder.finish()
- self.symbol_stats.purge_ghost_symbols()
- self.symbol_stats.close()
- self.symbol_stats = None
- self.metadata_logger = None
- self.metadata_db.close()
- self.metadata_db = None
- self._cvs_item_store.close()
- self._cvs_item_store = None
- self._set_cvs_path_ordinals()
- self.revision_recorder = None
- retval = self.fatal_errors
- self.fatal_errors = None
- return retval
-
-
diff --git a/cvs2svn_lib/common.py b/cvs2svn_lib/common.py
deleted file mode 100644
index 8400907..0000000
--- a/cvs2svn_lib/common.py
+++ /dev/null
@@ -1,409 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains common facilities used by cvs2svn."""
-
-
-import re
-import time
-import codecs
-
-from cvs2svn_lib.log import Log
-
-
-# Always use these constants for opening databases.
-DB_OPEN_READ = 'r'
-DB_OPEN_WRITE = 'w'
-DB_OPEN_NEW = 'n'
-
-
-SVN_INVALID_REVNUM = -1
-
-
-# Warnings and errors start with these strings. They are typically
-# followed by a colon and a space, as in "%s: " ==> "WARNING: ".
-warning_prefix = "WARNING"
-error_prefix = "ERROR"
-
-
-class FatalException(Exception):
- """Exception thrown on a non-recoverable error.
-
- If this exception is thrown by main(), it is caught by the global
- layer of the program, its string representation is printed (followed
- by a newline), and the program is ended with an exit code of 1."""
-
- pass
-
-
-class InternalError(Exception):
- """Exception thrown in the case of a cvs2svn internal error (aka, bug)."""
-
- pass
-
-
-class FatalError(FatalException):
- """A FatalException that prepends error_prefix to the message."""
-
- def __init__(self, msg):
- """Use (error_prefix + ': ' + MSG) as the error message."""
-
- FatalException.__init__(self, '%s: %s' % (error_prefix, msg,))
-
-
-class CommandError(FatalError):
- """A FatalError caused by a failed command invocation.
-
- The error message includes the command name, exit code, and output."""
-
- def __init__(self, command, exit_status, error_output=''):
- self.command = command
- self.exit_status = exit_status
- self.error_output = error_output
- if error_output.rstrip():
- FatalError.__init__(
- self,
- 'The command %r failed with exit status=%s\n'
- 'and the following output:\n'
- '%s'
- % (self.command, self.exit_status, self.error_output.rstrip()))
- else:
- FatalError.__init__(
- self,
- 'The command %r failed with exit status=%s and no output'
- % (self.command, self.exit_status))
-
-
-def path_join(*components):
- """Join two or more pathname COMPONENTS, inserting '/' as needed.
- Empty component are skipped."""
-
- return '/'.join(filter(None, components))
-
-
-def path_split(path):
- """Split the svn pathname PATH into a pair, (HEAD, TAIL).
-
- This is similar to os.path.split(), but always uses '/' as path
- separator. PATH is an svn path, which should not start with a '/'.
- HEAD is everything before the last slash, and TAIL is everything
- after. If PATH ends in a slash, TAIL will be empty. If there is no
- slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
- TAIL are empty."""
-
- pos = path.rfind('/')
- if pos == -1:
- return ('', path,)
- else:
- return (path[:pos], path[pos+1:],)
-
-
-class IllegalSVNPathError(FatalException):
- pass
-
-
-# Control characters (characters not allowed in Subversion filenames):
-ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]')
-
-
-def verify_svn_filename_legal(filename):
- """Verify that FILENAME is a legal filename.
-
- FILENAME is a path component of a CVS path. Check that it won't
- choke SVN:
-
- - Check that it is not empty.
-
- - Check that it is not equal to '.' or '..'.
-
- - Check that the filename does not include any control characters.
-
- If any of these tests fail, raise an IllegalSVNPathError."""
-
- if filename == '':
- raise IllegalSVNPathError("Empty filename component.")
-
- if filename in ['.', '..']:
- raise IllegalSVNPathError("Illegal filename component %r." % (filename,))
-
- m = ctrl_characters_regexp.search(filename)
- if m:
- raise IllegalSVNPathError(
- "Character %r in filename %r is not supported by Subversion."
- % (m.group(), filename,)
- )
-
-
-def verify_svn_path_legal(path):
- """Verify that PATH is a legitimate SVN path.
-
- If not, raise an IllegalSVNPathError."""
-
- if path.startswith('/'):
- raise IllegalSVNPathError("Path %r must not start with '/'." % (path,))
- head = path
- while head != '':
- (head,tail) = path_split(head)
- try:
- verify_svn_filename_legal(tail)
- except IllegalSVNPathError, e:
- raise IllegalSVNPathError('Problem with path %r: %s' % (path, e,))
-
-
-def normalize_svn_path(path, allow_empty=False):
- """Normalize an SVN path (e.g., one supplied by a user).
-
- 1. Strip leading, trailing, and duplicated '/'.
- 2. If ALLOW_EMPTY is not set, verify that PATH is not empty.
-
- Return the normalized path.
-
- If the path is invalid, raise an IllegalSVNPathError."""
-
- norm_path = path_join(*path.split('/'))
- if not allow_empty and not norm_path:
- raise IllegalSVNPathError("Path is empty")
- return norm_path
-
-
-class PathRepeatedException(Exception):
- def __init__(self, path, count):
- self.path = path
- self.count = count
- Exception.__init__(
- self, 'Path %s is repeated %d times' % (self.path, self.count,)
- )
-
-
-class PathsNestedException(Exception):
- def __init__(self, nest, nestlings):
- self.nest = nest
- self.nestlings = nestlings
- Exception.__init__(
- self,
- 'Path %s contains the following other paths: %s'
- % (self.nest, ', '.join(self.nestlings),)
- )
-
-
-class PathsNotDisjointException(FatalException):
- """An exception that collects multiple other disjointness exceptions."""
-
- def __init__(self, problems):
- self.problems = problems
- Exception.__init__(
- self,
- 'The following paths are not disjoint:\n'
- ' %s\n'
- % ('\n '.join([str(problem) for problem in self.problems]),)
- )
-
-
-def verify_paths_disjoint(*paths):
- """Verify that all of the paths in the argument list are disjoint.
-
- If any of the paths is nested in another one (i.e., in the sense
- that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
- raise a PathsNotDisjointException containing exceptions detailing
- the individual problems."""
-
- def split(path):
- if not path:
- return []
- else:
- return path.split('/')
-
- def contains(split_path1, split_path2):
- """Return True iff SPLIT_PATH1 contains SPLIT_PATH2."""
-
- return (
- len(split_path1) < len(split_path2)
- and split_path2[:len(split_path1)] == split_path1
- )
-
- paths = [(split(path), path) for path in paths]
- # If all overlapping elements are equal, a shorter list is
- # considered "less than" a longer one. Therefore if any paths are
- # nested, this sort will leave at least one such pair adjacent, in
- # the order [nest,nestling].
- paths.sort()
-
- problems = []
-
- # Create exceptions for any repeated paths, and delete the repeats
- # from the paths array:
- i = 0
- while i < len(paths):
- split_path, path = paths[i]
- j = i + 1
- while j < len(paths) and split_path == paths[j][0]:
- j += 1
- if j - i > 1:
- problems.append(PathRepeatedException(path, j - i))
- # Delete all but the first copy:
- del paths[i + 1:j]
- i += 1
-
- # Create exceptions for paths nested in each other:
- i = 0
- while i < len(paths):
- split_path, path = paths[i]
- j = i + 1
- while j < len(paths) and contains(split_path, paths[j][0]):
- j += 1
- if j - i > 1:
- problems.append(PathsNestedException(
- path, [path2 for (split_path2, path2) in paths[i + 1:j]]
- ))
- i += 1
-
- if problems:
- raise PathsNotDisjointException(problems)
-
-
-def format_date(date):
- """Return an svn-compatible date string for DATE (seconds since epoch).
-
- A Subversion date looks like '2002-09-29T14:44:59.000000Z'."""
-
- return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
-
-
-class CVSTextDecoder:
- """Callable that decodes CVS strings into Unicode."""
-
- def __init__(self, encodings, fallback_encoding=None):
- """Create a CVSTextDecoder instance.
-
- ENCODINGS is a list containing the names of encodings that are
- attempted to be used as source encodings in 'strict' mode.
-
- FALLBACK_ENCODING, if specified, is the name of an encoding that
- should be used as a source encoding in lossy 'replace' mode if all
- of ENCODINGS failed.
-
- Raise LookupError if any of the specified encodings is unknown."""
-
- self.decoders = [
- (encoding, codecs.lookup(encoding)[1])
- for encoding in encodings]
-
- if fallback_encoding is None:
- self.fallback_decoder = None
- else:
- self.fallback_decoder = (
- fallback_encoding, codecs.lookup(fallback_encoding)[1]
- )
-
- def add_encoding(self, encoding):
- """Add an encoding to be tried in 'strict' mode.
-
- ENCODING is the name of an encoding. If it is unknown, raise a
- LookupError."""
-
- for (name, decoder) in self.decoders:
- if name == encoding:
- return
- else:
- self.decoders.append( (encoding, codecs.lookup(encoding)[1]) )
-
- def set_fallback_encoding(self, encoding):
- """Set the fallback encoding, to be tried in 'replace' mode.
-
- ENCODING is the name of an encoding. If it is unknown, raise a
- LookupError."""
-
- if encoding is None:
- self.fallback_decoder = None
- else:
- self.fallback_decoder = (encoding, codecs.lookup(encoding)[1])
-
- def __call__(self, s):
- """Try to decode string S using our configured source encodings.
-
- Return the string as a Unicode string. If S is already a unicode
- string, do nothing.
-
- Raise UnicodeError if the string cannot be decoded using any of
- the source encodings and no fallback encoding was specified."""
-
- if isinstance(s, unicode):
- return s
- for (name, decoder) in self.decoders:
- try:
- return decoder(s)[0]
- except ValueError:
- Log().verbose("Encoding '%s' failed for string %r" % (name, s))
-
- if self.fallback_decoder is not None:
- (name, decoder) = self.fallback_decoder
- return decoder(s, 'replace')[0]
- else:
- raise UnicodeError
-
-
-class Timestamper:
- """Return monotonic timestamps derived from changeset timestamps."""
-
- def __init__(self):
- # The last timestamp that has been returned:
- self.timestamp = 0.0
-
- # The maximum timestamp that is considered reasonable:
- self.max_timestamp = time.time() + 24.0 * 60.0 * 60.0
-
- def get(self, timestamp, change_expected):
- """Return a reasonable timestamp derived from TIMESTAMP.
-
- Push TIMESTAMP into the future if necessary to ensure that it is
- at least one second later than every other timestamp that has been
- returned by previous calls to this method.
-
- If CHANGE_EXPECTED is not True, then log a message if the
- timestamp has to be changed."""
-
- if timestamp > self.max_timestamp:
- # If a timestamp is in the future, it is assumed that it is
- # bogus. Shift it backwards in time to prevent it forcing other
- # timestamps to be pushed even further in the future.
-
- # Note that this is not nearly a complete solution to the bogus
- # timestamp problem. A timestamp in the future still affects
- # the ordering of changesets, and a changeset having such a
- # timestamp will not be committed until all changesets with
- # earlier timestamps have been committed, even if other
- # changesets with even earlier timestamps depend on this one.
- self.timestamp = self.timestamp + 1.0
- if not change_expected:
- Log().warn(
- 'Timestamp "%s" is in the future; changed to "%s".'
- % (time.asctime(time.gmtime(timestamp)),
- time.asctime(time.gmtime(self.timestamp)),)
- )
- elif timestamp < self.timestamp + 1.0:
- self.timestamp = self.timestamp + 1.0
- if not change_expected and Log().is_on(Log.VERBOSE):
- Log().verbose(
- 'Timestamp "%s" adjusted to "%s" to ensure monotonicity.'
- % (time.asctime(time.gmtime(timestamp)),
- time.asctime(time.gmtime(self.timestamp)),)
- )
- else:
- self.timestamp = timestamp
-
- return self.timestamp
-
-
diff --git a/cvs2svn_lib/config.py b/cvs2svn_lib/config.py
deleted file mode 100644
index b313b2c..0000000
--- a/cvs2svn_lib/config.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains various configuration constants used by cvs2svn."""
-
-
-SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
-
-# The default names for the trunk/branches/tags directory for each
-# project:
-DEFAULT_TRUNK_BASE = 'trunk'
-DEFAULT_BRANCHES_BASE = 'branches'
-DEFAULT_TAGS_BASE = 'tags'
-
-SVNADMIN_EXECUTABLE = 'svnadmin'
-CO_EXECUTABLE = 'co'
-CVS_EXECUTABLE = 'cvs'
-SORT_EXECUTABLE = 'sort'
-
-# A pickled list of the projects defined for this conversion.
-PROJECTS = 'projects.pck'
-
-# A file holding the Serializer to be used for
-# CVS_REVS_SUMMARY_*_DATAFILE and CVS_SYMBOLS_SYMMARY_*_DATAFILE:
-SUMMARY_SERIALIZER = 'summary-serializer.pck'
-
-# The first file contains enough information about each CVSRevision to
-# deduce preliminary Changesets. The second file is a sorted version
-# of the first.
-CVS_REVS_SUMMARY_DATAFILE = 'revs-summary.txt'
-CVS_REVS_SUMMARY_SORTED_DATAFILE = 'revs-summary-s.txt'
-
-# The first file contains enough information about each CVSSymbol to
-# deduce preliminary Changesets. The second file is a sorted version
-# of the first.
-CVS_SYMBOLS_SUMMARY_DATAFILE = 'symbols-summary.txt'
-CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE = 'symbols-summary-s.txt'
-
-# A mapping from CVSItem id to Changeset id.
-CVS_ITEM_TO_CHANGESET = 'cvs-item-to-changeset.dat'
-
-# A mapping from CVSItem id to Changeset id, after the
-# RevisionChangeset loops have been broken.
-CVS_ITEM_TO_CHANGESET_REVBROKEN = 'cvs-item-to-changeset-revbroken.dat'
-
-# A mapping from CVSItem id to Changeset id, after the SymbolChangeset
-# loops have been broken.
-CVS_ITEM_TO_CHANGESET_SYMBROKEN = 'cvs-item-to-changeset-symbroken.dat'
-
-# A mapping from CVSItem id to Changeset id, after all Changeset
-# loops have been broken.
-CVS_ITEM_TO_CHANGESET_ALLBROKEN = 'cvs-item-to-changeset-allbroken.dat'
-
-# A mapping from id to Changeset.
-CHANGESETS_INDEX = 'changesets-index.dat'
-CHANGESETS_STORE = 'changesets.pck'
-
-# A mapping from id to Changeset, after the RevisionChangeset loops
-# have been broken.
-CHANGESETS_REVBROKEN_INDEX = 'changesets-revbroken-index.dat'
-CHANGESETS_REVBROKEN_STORE = 'changesets-revbroken.pck'
-
-# A mapping from id to Changeset, after the RevisionChangesets have
-# been sorted and converted into OrderedChangesets.
-CHANGESETS_REVSORTED_INDEX = 'changesets-revsorted-index.dat'
-CHANGESETS_REVSORTED_STORE = 'changesets-revsorted.pck'
-
-# A mapping from id to Changeset, after the SymbolChangeset loops have
-# been broken.
-CHANGESETS_SYMBROKEN_INDEX = 'changesets-symbroken-index.dat'
-CHANGESETS_SYMBROKEN_STORE = 'changesets-symbroken.pck'
-
-# A mapping from id to Changeset, after all Changeset loops have been
-# broken.
-CHANGESETS_ALLBROKEN_INDEX = 'changesets-allbroken-index.dat'
-CHANGESETS_ALLBROKEN_STORE = 'changesets-allbroken.pck'
-
-# The RevisionChangesets in commit order. Each line contains the
-# changeset id and timestamp of one changeset, in hexadecimal, in the
-# order that the changesets should be committed to svn.
-CHANGESETS_SORTED_DATAFILE = 'changesets-s.txt'
-
-# A file containing a marshalled copy of all the statistics that have
-# been gathered so far is written at the end of each pass as a
-# marshalled dictionary. This is the pattern used to generate the
-# filenames.
-STATISTICS_FILE = 'statistics-%02d.pck'
-
-# This text file contains records (1 per line) that describe openings
-# and closings for copies to tags and branches. The format is as
-# follows:
-#
-# SYMBOL_ID SVN_REVNUM TYPE CVS_SYMBOL_ID
-#
-# where type is either OPENING or CLOSING. CVS_SYMBOL_ID is the id of
-# the CVSSymbol whose opening or closing is being described (in hex).
-SYMBOL_OPENINGS_CLOSINGS = 'symbolic-names.txt'
-# A sorted version of the above file. SYMBOL_ID and SVN_REVNUM are
-# the primary and secondary sorting criteria. It is important that
-# SYMBOL_IDs be located together to make it quick to read them at
-# once. The order of SVN_REVNUM is only important because it is
-# assumed by some internal consistency checks.
-SYMBOL_OPENINGS_CLOSINGS_SORTED = 'symbolic-names-s.txt'
-
-# Skeleton version of the repository filesystem. See class
-# RepositoryMirror for how these work.
-MIRROR_NODES_INDEX_TABLE = 'mirror-nodes-index.dat'
-MIRROR_NODES_STORE = 'mirror-nodes.pck'
-
-# Offsets pointing to the beginning of each symbol's records in
-# SYMBOL_OPENINGS_CLOSINGS_SORTED. This file contains a pickled map
-# from symbol_id to file offset.
-SYMBOL_OFFSETS_DB = 'symbol-offsets.pck'
-
-# Pickled map of CVSFile.id to instance.
-CVS_FILES_DB = 'cvs-files.pck'
-
-# A series of records. The first is a pickled serializer. Each
-# subsequent record is a serialized list of all CVSItems applying to a
-# CVSFile.
-CVS_ITEMS_STORE = 'cvs-items.pck'
-
-# The same as above, but with the CVSItems ordered in groups based on
-# their initial changesets. CVSItems will usually be accessed one
-# changeset at a time, so this ordering helps disk locality (even
-# though some of the changesets will later be broken up).
-CVS_ITEMS_SORTED_INDEX_TABLE = 'cvs-items-sorted-index.dat'
-CVS_ITEMS_SORTED_STORE = 'cvs-items-sorted.pck'
-
-# A record of all symbolic names that will be processed in the
-# conversion. This file contains a pickled list of TypedSymbol
-# objects.
-SYMBOL_DB = 'symbols.pck'
-
-# A pickled list of the statistics for all symbols. Each entry in the
-# list is an instance of cvs2svn_lib.symbol_statistics._Stats.
-SYMBOL_STATISTICS = 'symbol-statistics.pck'
-
-# These two databases provide a bidirectional mapping between
-# CVSRevision.ids (in hex) and Subversion revision numbers.
-#
-# The first maps CVSRevision.id to the SVN revision number of which it
-# is a part (more than one CVSRevision can map to the same SVN
-# revision number).
-#
-# The second maps Subversion revision numbers (as hex strings) to
-# pickled SVNCommit instances.
-CVS_REVS_TO_SVN_REVNUMS = 'cvs-revs-to-svn-revnums.dat'
-
-# This database maps Subversion revision numbers to pickled SVNCommit
-# instances.
-SVN_COMMITS_INDEX_TABLE = 'svn-commits-index.dat'
-SVN_COMMITS_STORE = 'svn-commits.pck'
-
-# How many bytes to read at a time from a pipe. 128 kiB should be
-# large enough to be efficient without wasting too much memory.
-PIPE_READ_SIZE = 128 * 1024
-
-# Records the author and log message for each changeset. The database
-# contains a map metadata_id -> (author, logmessage). Each
-# CVSRevision that is eligible to be combined into the same SVN commit
-# is assigned the same id. Note that the (author, logmessage) pairs
-# are not necessarily all distinct; other data are taken into account
-# when constructing ids.
-METADATA_INDEX_TABLE = 'metadata-index.dat'
-METADATA_STORE = 'metadata.pck'
-
-# The same, after it has been cleaned up for the chosen output option:
-METADATA_CLEAN_INDEX_TABLE = 'metadata-clean-index.dat'
-METADATA_CLEAN_STORE = 'metadata-clean.pck'
-
-# The following four databases are used in conjunction with --use-internal-co.
-
-# Records the RCS deltas for all CVS revisions. The deltas are to be
-# applied forward, i.e. those from trunk are reversed wrt RCS.
-RCS_DELTAS_INDEX_TABLE = 'rcs-deltas-index.dat'
-RCS_DELTAS_STORE = 'rcs-deltas.pck'
-
-# Records the revision tree of each RCS file. The format is a list of
-# list of integers. The outer list holds lines of development, the inner list
-# revisions within the LODs, revisions are CVSItem ids. Branches "closer
-# to the trunk" appear later. Revisions are sorted by reverse chronological
-# order. The last revision of each branch is the revision it sprouts from.
-# Revisions that represent deletions at the end of a branch are omitted.
-RCS_TREES_INDEX_TABLE = 'rcs-trees-index.dat'
-RCS_TREES_STORE = 'rcs-trees.pck'
-
-# Records the revision tree of each RCS file after removing revisions
-# belonging to excluded branches. Note that the branch ordering is arbitrary
-# in this file.
-RCS_TREES_FILTERED_INDEX_TABLE = 'rcs-trees-filtered-index.dat'
-RCS_TREES_FILTERED_STORE = 'rcs-trees-filtered.pck'
-
-# At any given time during OutputPass, holds the full text of each CVS
-# revision that was checked out already and still has descendants that will
-# be checked out.
-CVS_CHECKOUT_DB = 'cvs-checkout.db'
-
-# End of DBs related to --use-internal-co.
-
-# If this run will output directly to a Subversion repository, then
-# this is the name of the file that each revision will temporarily be
-# written to prior to writing it into the repository.
-DUMPFILE = 'svn.dump'
-
-# flush a commit if a 5 minute gap occurs.
-COMMIT_THRESHOLD = 5 * 60
-
diff --git a/cvs2svn_lib/context.py b/cvs2svn_lib/context.py
deleted file mode 100644
index 89dc16a..0000000
--- a/cvs2svn_lib/context.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Store the context (options, etc) for a cvs2svn run."""
-
-
-import os
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import CVSTextDecoder
-
-
-class Ctx:
- """Session state for this run of cvs2svn. For example, run-time
- options are stored here. This class is a Borg (see
- http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531)."""
-
- __shared_state = { }
-
- def __init__(self):
- self.__dict__ = self.__shared_state
- if self.__dict__:
- return
- # Else, initialize to defaults.
- self.set_defaults()
-
- def set_defaults(self):
- """Set all parameters to their default values."""
-
- self.output_option = None
- self.dry_run = False
- self.revision_recorder = None
- self.revision_excluder = None
- self.revision_reader = None
- self.svnadmin_executable = config.SVNADMIN_EXECUTABLE
- self.sort_executable = config.SORT_EXECUTABLE
- self.trunk_only = False
- self.prune = True
- self.cvs_author_decoder = CVSTextDecoder(['ascii'])
- self.cvs_log_decoder = CVSTextDecoder(['ascii'])
- self.cvs_filename_decoder = CVSTextDecoder(['ascii'])
- self.decode_apple_single = False
- self.symbol_info_filename = None
- self.username = None
- self.svn_property_setters = []
- self.tmpdir = 'cvs2svn-tmp'
- self.skip_cleanup = False
- self.keep_cvsignore = False
- self.cross_project_commits = True
- self.cross_branch_commits = True
- self.retain_conflicting_attic_files = False
-
- self.initial_project_commit_message = (
- 'Standard project directories initialized by cvs2svn.'
- )
- self.post_commit_message = (
- 'This commit was generated by cvs2svn to compensate for '
- 'changes in r%(revnum)d, which included commits to RCS files '
- 'with non-trunk default branches.'
- )
- self.symbol_commit_message = (
- "This commit was manufactured by cvs2svn to create %(symbol_type)s "
- "'%(symbol_name)s'."
- )
-
-
- def get_temp_filename(self, basename):
- return os.path.join(self.tmpdir, basename)
-
- def clean(self):
- """Dispose of items in our dictionary that are not intended to
- live past the end of a pass (identified by exactly one leading
- underscore)."""
-
- for attr in self.__dict__.keys():
- if (attr.startswith('_') and not attr.startswith('__')
- and not attr.startswith('_Ctx__')):
- delattr(self, attr)
-
-
diff --git a/cvs2svn_lib/cvs_file.py b/cvs2svn_lib/cvs_file.py
deleted file mode 100644
index 3a1bb4f..0000000
--- a/cvs2svn_lib/cvs_file.py
+++ /dev/null
@@ -1,287 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains a class to store information about a CVS file."""
-
-import os
-
-from cvs2svn_lib.common import path_join
-from cvs2svn_lib.context import Ctx
-
-
-class CVSPath(object):
- """Represent a CVS file or directory.
-
- Members:
-
- id -- (int) unique ID for this CVSPath. At any moment, there is
- at most one CVSPath instance with a particular ID. (This
- means that object identity is the same as object equality, and
- objects can be used as map keys even though they don't have a
- __hash__() method).
-
- project -- (Project) the project containing this CVSPath.
-
- parent_directory -- (CVSDirectory or None) the CVSDirectory
- containing this CVSPath.
-
- basename -- (string) the base name of this CVSPath (no ',v'). The
- basename of the root directory of a project is ''.
-
- ordinal -- (int) the order that this instance should be sorted
- relative to other CVSPath instances. This member is set based
- on the ordering imposed by slow_compare() by CollectData after
- all CVSFiles have been processed. Comparisons of CVSPath
- using __cmp__() simply compare the ordinals.
-
- """
-
- __slots__ = [
- 'id',
- 'project',
- 'parent_directory',
- 'basename',
- 'ordinal',
- ]
-
- def __init__(self, id, project, parent_directory, basename):
- self.id = id
- self.project = project
- self.parent_directory = parent_directory
- self.basename = basename
-
- def __getstate__(self):
- """This method must only be called after ordinal has been set."""
-
- return (
- self.id, self.project.id,
- self.parent_directory, self.basename,
- self.ordinal,
- )
-
- def __setstate__(self, state):
- (
- self.id, project_id,
- self.parent_directory, self.basename,
- self.ordinal,
- ) = state
- self.project = Ctx()._projects[project_id]
-
- def get_ancestry(self):
- """Return a list of the CVSPaths leading from the root path to SELF.
-
- Return the CVSPaths in a list, starting with
- self.project.get_root_cvs_directory() and ending with self."""
-
- ancestry = []
- p = self
- while p is not None:
- ancestry.append(p)
- p = p.parent_directory
-
- ancestry.reverse()
- return ancestry
-
- def get_cvs_path(self):
- """Return the canonical path within the Project.
-
- The canonical path:
-
- - Uses forward slashes
-
- - Doesn't include ',v' for files
-
- - This doesn't include the 'Attic' segment of the path unless the
- file is to be left in an Attic directory in the SVN repository;
- i.e., if a filename exists in and out of Attic and the
- --retain-conflicting-attic-files option was specified.
-
- """
-
- return path_join(*[p.basename for p in self.get_ancestry()[1:]])
-
- cvs_path = property(get_cvs_path)
-
- def _get_dir_components(self):
- """Return a list containing the components of the path leading to SELF.
-
- The return value contains the base names of all of the parent
- directories (except for the root directory) and SELF."""
-
- return [p.basename for p in self.get_ancestry()[1:]]
-
- def __eq__(a, b):
- """Compare two CVSPath instances for equality.
-
- This method is supplied to avoid using __cmp__() for comparing for
- equality."""
-
- return a is b
-
- def slow_compare(a, b):
- return (
- # Sort first by project:
- cmp(a.project, b.project)
- # Then by directory components:
- or cmp(a._get_dir_components(), b._get_dir_components())
- )
-
- def __cmp__(a, b):
- """This method must only be called after ordinal has been set."""
-
- return cmp(a.ordinal, b.ordinal)
-
-
-class CVSDirectory(CVSPath):
- """Represent a CVS directory.
-
- Members:
-
- id -- (int or None) unique id for this file. If None, a new id is
- generated.
-
- project -- (Project) the project containing this file.
-
- parent_directory -- (CVSDirectory or None) the CVSDirectory
- containing this CVSDirectory.
-
- basename -- (string) the base name of this CVSDirectory (no ',v').
-
- """
-
- __slots__ = []
-
- def __init__(self, id, project, parent_directory, basename):
- """Initialize a new CVSDirectory object."""
-
- CVSPath.__init__(self, id, project, parent_directory, basename)
-
- def get_filename(self):
- """Return the filesystem path to this CVSPath in the CVS repository."""
-
- if self.parent_directory is None:
- return self.project.project_cvs_repos_path
- else:
- return os.path.join(
- self.parent_directory.get_filename(), self.basename
- )
-
- filename = property(get_filename)
-
- def __getstate__(self):
- return CVSPath.__getstate__(self)
-
- def __setstate__(self, state):
- CVSPath.__setstate__(self, state)
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return self.cvs_path + '/'
-
- def __repr__(self):
- return 'CVSDirectory<%x>(%r)' % (self.id, str(self),)
-
-
-class CVSFile(CVSPath):
- """Represent a CVS file.
-
- Members:
-
- id -- (int) unique id for this file.
-
- project -- (Project) the project containing this file.
-
- parent_directory -- (CVSDirectory) the CVSDirectory containing
- this CVSFile.
-
- basename -- (string) the base name of this CVSFile (no ',v').
-
- _in_attic -- (bool) True if RCS file is in an Attic subdirectory
- that is not considered the parent directory. (If a file is
- in-and-out-of-attic and one copy is to be left in Attic after
- the conversion, then the Attic directory is that file's
- PARENT_DIRECTORY and _IN_ATTIC is False.)
-
- executable -- (bool) True iff RCS file has executable bit set.
-
- file_size -- (long) size of the RCS file in bytes.
-
- mode -- (string or None) 'kkv', 'kb', etc.
-
- PARENT_DIRECTORY might contain an 'Attic' component if it should be
- retained in the SVN repository; i.e., if the same filename exists out
- of Attic and the --retain-conflicting-attic-files option was specified.
-
- """
-
- __slots__ = [
- '_in_attic',
- 'executable',
- 'file_size',
- 'mode',
- ]
-
- def __init__(
- self, id, project, parent_directory, basename, in_attic,
- executable, file_size, mode
- ):
- """Initialize a new CVSFile object."""
-
- CVSPath.__init__(self, id, project, parent_directory, basename)
- self._in_attic = in_attic
- self.executable = executable
- self.file_size = file_size
- self.mode = mode
-
- assert self.parent_directory is not None
-
- def get_filename(self):
- """Return the filesystem path to this CVSPath in the CVS repository."""
-
- if self._in_attic:
- return os.path.join(
- self.parent_directory.filename, 'Attic', self.basename + ',v'
- )
- else:
- return os.path.join(
- self.parent_directory.filename, self.basename + ',v'
- )
-
- filename = property(get_filename)
-
- def __getstate__(self):
- return (
- CVSPath.__getstate__(self),
- self._in_attic, self.executable, self.file_size, self.mode,
- )
-
- def __setstate__(self, state):
- (
- cvs_path_state,
- self._in_attic, self.executable, self.file_size, self.mode,
- ) = state
- CVSPath.__setstate__(self, cvs_path_state)
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return self.cvs_path
-
- def __repr__(self):
- return 'CVSFile<%x>(%r)' % (self.id, str(self),)
-
-
diff --git a/cvs2svn_lib/cvs_file_database.py b/cvs2svn_lib/cvs_file_database.py
deleted file mode 100644
index 61eebf3..0000000
--- a/cvs2svn_lib/cvs_file_database.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains database facilities used by cvs2svn."""
-
-
-import cPickle
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import DB_OPEN_READ
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.artifact_manager import artifact_manager
-
-
-class CVSFileDatabase:
- """A database to store CVSFile objects and retrieve them by their id."""
-
- def __init__(self, mode):
- """Initialize an instance, opening database in MODE (where MODE is
- either DB_OPEN_NEW or DB_OPEN_READ)."""
-
- self.mode = mode
-
- # A map { id : CVSFile }
- self._cvs_files = {}
-
- if self.mode == DB_OPEN_NEW:
- pass
- elif self.mode == DB_OPEN_READ:
- f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'rb')
- cvs_files = cPickle.load(f)
- for cvs_file in cvs_files:
- self._cvs_files[cvs_file.id] = cvs_file
- else:
- raise RuntimeError('Invalid mode %r' % self.mode)
-
- def log_file(self, cvs_file):
- """Add CVS_FILE, a CVSFile instance, to the database."""
-
- if self.mode == DB_OPEN_READ:
- raise RuntimeError('Cannot write items in mode %r' % self.mode)
-
- self._cvs_files[cvs_file.id] = cvs_file
-
- def itervalues(self):
- for value in self._cvs_files.itervalues():
- yield value
-
- def get_file(self, id):
- """Return the CVSFile with the specified ID."""
-
- return self._cvs_files[id]
-
- def close(self):
- if self.mode == DB_OPEN_NEW:
- f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'wb')
- cPickle.dump(self._cvs_files.values(), f, -1)
- f.close()
-
- self._cvs_files = None
-
-
diff --git a/cvs2svn_lib/cvs_file_items.py b/cvs2svn_lib/cvs_file_items.py
deleted file mode 100644
index f0dc782..0000000
--- a/cvs2svn_lib/cvs_file_items.py
+++ /dev/null
@@ -1,1075 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains a class to manage the CVSItems related to one file."""
-
-
-import re
-
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.symbol import Branch
-from cvs2svn_lib.symbol import Tag
-from cvs2svn_lib.symbol import ExcludedSymbol
-from cvs2svn_lib.cvs_item import CVSRevision
-from cvs2svn_lib.cvs_item import CVSRevisionModification
-from cvs2svn_lib.cvs_item import CVSRevisionAbsent
-from cvs2svn_lib.cvs_item import CVSRevisionNoop
-from cvs2svn_lib.cvs_item import CVSSymbol
-from cvs2svn_lib.cvs_item import CVSBranch
-from cvs2svn_lib.cvs_item import CVSTag
-from cvs2svn_lib.cvs_item import cvs_revision_type_map
-from cvs2svn_lib.cvs_item import cvs_branch_type_map
-from cvs2svn_lib.cvs_item import cvs_tag_type_map
-
-
-class VendorBranchError(Exception):
- """There is an error in the structure of the file revision tree."""
-
- pass
-
-
-class LODItems(object):
- def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
- # The LineOfDevelopment described by this instance.
- self.lod = lod
-
- # The CVSBranch starting this LOD, if any; otherwise, None.
- self.cvs_branch = cvs_branch
-
- # The list of CVSRevisions on this LOD, if any. The CVSRevisions
- # are listed in dependency order.
- self.cvs_revisions = cvs_revisions
-
- # A list of CVSBranches that sprout from this LOD (either from
- # cvs_branch or from one of the CVSRevisions).
- self.cvs_branches = cvs_branches
-
- # A list of CVSTags that sprout from this LOD (either from
- # cvs_branch or from one of the CVSRevisions).
- self.cvs_tags = cvs_tags
-
- def is_trivial_import(self):
- """Return True iff this LOD is a trivial import branch in this file.
-
- A trivial import branch is a branch that was used for a single
- import and nothing else. Such a branch is eligible for being
- grafted onto trunk, even if it has branch blockers."""
-
- return (
- len(self.cvs_revisions) == 1
- and self.cvs_revisions[0].ntdbr
- )
-
- def is_pure_ntdb(self):
- """Return True iff this LOD is a pure NTDB in this file.
-
- A pure non-trunk default branch is defined to be a branch that
- contains only NTDB revisions (and at least one of them). Such a
- branch is eligible for being grafted onto trunk, even if it has
- branch blockers."""
-
- return (
- self.cvs_revisions
- and self.cvs_revisions[-1].ntdbr
- )
-
- def iter_blockers(self):
- if self.is_pure_ntdb():
- # Such a branch has no blockers, because the blockers can be
- # grafted to trunk.
- pass
- else:
- # Other branches are only blocked by symbols that sprout from
- # non-NTDB revisions:
- non_ntdbr_revision_ids = set()
- for cvs_revision in self.cvs_revisions:
- if not cvs_revision.ntdbr:
- non_ntdbr_revision_ids.add(cvs_revision.id)
-
- for cvs_tag in self.cvs_tags:
- if cvs_tag.source_id in non_ntdbr_revision_ids:
- yield cvs_tag
-
- for cvs_branch in self.cvs_branches:
- if cvs_branch.source_id in non_ntdbr_revision_ids:
- yield cvs_branch
-
-
-class CVSFileItems(object):
- def __init__(self, cvs_file, trunk, cvs_items):
- # The file whose data this instance holds.
- self.cvs_file = cvs_file
-
- # The symbol that represents "Trunk" in this file.
- self.trunk = trunk
-
- # A map from CVSItem.id to CVSItem:
- self._cvs_items = {}
-
- # The cvs_item_id of each root in the CVSItem forest. (A root is
- # defined to be any CVSRevision with no prev_id.)
- self.root_ids = set()
-
- for cvs_item in cvs_items:
- self.add(cvs_item)
- if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
- self.root_ids.add(cvs_item.id)
-
- def __getstate__(self):
- return (self.cvs_file.id, self.values(),)
-
- def __setstate__(self, state):
- (cvs_file_id, cvs_items,) = state
- cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
- CVSFileItems.__init__(
- self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
- )
-
- def add(self, cvs_item):
- self._cvs_items[cvs_item.id] = cvs_item
-
- def __getitem__(self, id):
- """Return the CVSItem with the specified ID."""
-
- return self._cvs_items[id]
-
- def get(self, id, default=None):
- return self._cvs_items.get(id, default)
-
- def __delitem__(self, id):
- assert id not in self.root_ids
- del self._cvs_items[id]
-
- def values(self):
- return self._cvs_items.values()
-
- def check_link_consistency(self):
- """Check that the CVSItems are linked correctly with each other."""
-
- for cvs_item in self.values():
- try:
- cvs_item.check_links(self)
- except AssertionError:
- Log().error(
- 'Link consistency error in %s\n'
- 'This is probably a bug internal to cvs2svn. Please file a bug\n'
- 'report including the following stack trace (see FAQ for more '
- 'info).'
- % (cvs_item,))
- raise
-
- def _get_lod(self, lod, cvs_branch, start_id):
- """Return the indicated LODItems.
-
- LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the
- CVSBranch instance that starts the LOD if any; otherwise it is
- None. START_ID is the id of the first CVSRevision on this LOD, or
- None if there are none."""
-
- cvs_revisions = []
- cvs_branches = []
- cvs_tags = []
-
- def process_subitems(cvs_item):
- """Process the branches and tags that are rooted in CVS_ITEM.
-
- CVS_ITEM can be a CVSRevision or a CVSBranch."""
-
- for branch_id in cvs_item.branch_ids[:]:
- cvs_branches.append(self[branch_id])
-
- for tag_id in cvs_item.tag_ids:
- cvs_tags.append(self[tag_id])
-
- if cvs_branch is not None:
- # Include the symbols sprouting directly from the CVSBranch:
- process_subitems(cvs_branch)
-
- id = start_id
- while id is not None:
- cvs_rev = self[id]
- cvs_revisions.append(cvs_rev)
- process_subitems(cvs_rev)
- id = cvs_rev.next_id
-
- return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
-
- def get_lod_items(self, cvs_branch):
- """Return an LODItems describing the branch that starts at CVS_BRANCH.
-
- CVS_BRANCH must be an instance of CVSBranch contained in this
- CVSFileItems."""
-
- return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
-
- def iter_root_lods(self):
- """Iterate over the LODItems for all root LODs (non-recursively)."""
-
- for id in list(self.root_ids):
- cvs_item = self[id]
- if isinstance(cvs_item, CVSRevision):
- # This LOD doesn't have a CVSBranch associated with it.
- # Either it is Trunk, or it is a branch whose CVSBranch has
- # been deleted.
- yield self._get_lod(cvs_item.lod, None, id)
- elif isinstance(cvs_item, CVSBranch):
- # This is a Branch that has been severed from the rest of the
- # tree.
- yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
- else:
- raise InternalError('Unexpected root item: %s' % (cvs_item,))
-
- def _iter_tree(self, lod, cvs_branch, start_id):
- """Iterate over the tree that starts at the specified line of development.
-
- LOD is the LineOfDevelopment where the iteration should start.
- CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
- otherwise it is None. ID is the id of the first CVSRevision on
- this LOD, or None if there are none.
-
- There are two cases handled by this routine: trunk (where LOD is a
- Trunk instance, CVS_BRANCH is None, and ID is the id of the 1.1
- revision) and a branch (where LOD is a Branch instance, CVS_BRANCH
- is a CVSBranch instance, and ID is either the id of the first
- CVSRevision on the branch or None if there are no CVSRevisions on
- the branch). Note that CVS_BRANCH and ID cannot simultaneously be
- None.
-
- Yield an LODItems instance for each line of development."""
-
- cvs_revisions = []
- cvs_branches = []
- cvs_tags = []
-
- def process_subitems(cvs_item):
- """Process the branches and tags that are rooted in CVS_ITEM.
-
- CVS_ITEM can be a CVSRevision or a CVSBranch."""
-
- for branch_id in cvs_item.branch_ids[:]:
- # Recurse into the branch:
- branch = self[branch_id]
- for lod_items in self._iter_tree(
- branch.symbol, branch, branch.next_id
- ):
- yield lod_items
- # The caller might have deleted the branch that we just
- # yielded. If it is no longer present, then do not add it to
- # the list of cvs_branches.
- try:
- cvs_branches.append(self[branch_id])
- except KeyError:
- pass
-
- for tag_id in cvs_item.tag_ids:
- cvs_tags.append(self[tag_id])
-
- if cvs_branch is not None:
- # Include the symbols sprouting directly from the CVSBranch:
- for lod_items in process_subitems(cvs_branch):
- yield lod_items
-
- id = start_id
- while id is not None:
- cvs_rev = self[id]
- cvs_revisions.append(cvs_rev)
-
- for lod_items in process_subitems(cvs_rev):
- yield lod_items
-
- id = cvs_rev.next_id
-
- yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
-
- def iter_lods(self):
- """Iterate over LinesOfDevelopment in this file, in depth-first order.
-
- For each LOD, yield an LODItems instance. The traversal starts at
- each root node but returns the LODs in depth-first order.
-
- It is allowed to modify the CVSFileItems instance while the
- traversal is occurring, but only in ways that don't affect the
- tree structure above (i.e., towards the trunk from) the current
- LOD."""
-
- # Make a list out of root_ids so that callers can change it:
- for id in list(self.root_ids):
- cvs_item = self[id]
- if isinstance(cvs_item, CVSRevision):
- # This LOD doesn't have a CVSBranch associated with it.
- # Either it is Trunk, or it is a branch whose CVSBranch has
- # been deleted.
- lod = cvs_item.lod
- cvs_branch = None
- elif isinstance(cvs_item, CVSBranch):
- # This is a Branch that has been severed from the rest of the
- # tree.
- lod = cvs_item.symbol
- id = cvs_item.next_id
- cvs_branch = cvs_item
- else:
- raise InternalError('Unexpected root item: %s' % (cvs_item,))
-
- for lod_items in self._iter_tree(lod, cvs_branch, id):
- yield lod_items
-
- def iter_deltatext_ancestors(self, cvs_rev):
- """Generate the delta-dependency ancestors of CVS_REV.
-
- Generate then ancestors of CVS_REV in deltatext order; i.e., back
- along branches towards trunk, then outwards along trunk towards
- HEAD."""
-
- while True:
- # Determine the next candidate source revision:
- if isinstance(cvs_rev.lod, Trunk):
- if cvs_rev.next_id is None:
- # HEAD has no ancestors, so we are done:
- return
- else:
- cvs_rev = self[cvs_rev.next_id]
- else:
- cvs_rev = self[cvs_rev.prev_id]
-
- yield cvs_rev
-
- def _sever_branch(self, lod_items):
- """Sever the branch from its source and discard the CVSBranch.
-
- LOD_ITEMS describes a branch that should be severed from its
- source, deleting the CVSBranch and creating a new root. Also set
- LOD_ITEMS.cvs_branch to none.
-
- This method can only be used before symbols have been grafted onto
- CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or
- NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
-
- cvs_branch = lod_items.cvs_branch
- assert cvs_branch is not None
- assert not cvs_branch.tag_ids
- assert not cvs_branch.branch_ids
- source_rev = self[cvs_branch.source_id]
-
- # We only cover the following case, even though after
- # FilterSymbolsPass cvs_branch.source_id might refer to another
- # CVSBranch.
- assert isinstance(source_rev, CVSRevision)
-
- # Delete the CVSBranch itself:
- lod_items.cvs_branch = None
- del self[cvs_branch.id]
-
- # Delete the reference from the source revision to the CVSBranch:
- source_rev.branch_ids.remove(cvs_branch.id)
-
- # Delete the reference from the first revision on the branch to
- # the CVSBranch:
- if lod_items.cvs_revisions:
- first_rev = lod_items.cvs_revisions[0]
-
- # Delete the reference from first_rev to the CVSBranch:
- first_rev.first_on_branch_id = None
-
- # Delete the reference from the source revision to the first
- # revision on the branch:
- source_rev.branch_commit_ids.remove(first_rev.id)
-
- # ...and vice versa:
- first_rev.prev_id = None
-
- # Change the type of first_rev (e.g., from Change to Add):
- first_rev.__class__ = cvs_revision_type_map[
- (isinstance(first_rev, CVSRevisionModification), False,)
- ]
-
- # Now first_rev is a new root:
- self.root_ids.add(first_rev.id)
-
- def adjust_ntdbrs(self, ntdbr_cvs_revs):
- """Adjust the specified non-trunk default branch revisions.
-
- NTDBR_CVS_REVS is a list of CVSRevision instances in this file
- that have been determined to be non-trunk default branch
- revisions.
-
- The first revision on the default branch is handled strangely by
- CVS. If a file is imported (as opposed to being added), CVS
- creates a 1.1 revision, then creates a vendor branch 1.1.1 based
- on 1.1, then creates a 1.1.1.1 revision that is identical to the
- 1.1 revision (i.e., its deltatext is empty). The log message that
- the user typed when importing is stored with the 1.1.1.1 revision.
- The 1.1 revision always contains a standard, generated log
- message, 'Initial revision\n'.
-
- When we detect a straightforward import like this, we want to
- handle it by deleting the 1.1 revision (which doesn't contain any
- useful information) and making 1.1.1.1 into an independent root in
- the file's dependency tree. In SVN, 1.1.1.1 will be added
- directly to the vendor branch with its initial content. Then in a
- special 'post-commit', the 1.1.1.1 revision is copied back to
- trunk.
-
- If the user imports again to the same vendor branch, then CVS
- creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
- *without* counterparts in trunk (even though these revisions
- effectively play the role of trunk revisions). So after we add
- such revisions to the vendor branch, we also copy them back to
- trunk in post-commits.
-
- Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
- True. Also, if there is a 1.2 revision, then set that revision to
- depend on the last non-trunk default branch revision and possibly
- adjust its type accordingly."""
-
- for cvs_rev in ntdbr_cvs_revs:
- cvs_rev.ntdbr = True
-
- # Look for a 1.2 revision:
- rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
-
- rev_1_2 = self.get(rev_1_1.next_id)
- if rev_1_2 is not None:
- # Revision 1.2 logically follows the imported revisions, not
- # 1.1. Accordingly, connect it to the last NTDBR and possibly
- # change its type.
- last_ntdbr = ntdbr_cvs_revs[-1]
- rev_1_2.ntdbr_prev_id = last_ntdbr.id
- last_ntdbr.ntdbr_next_id = rev_1_2.id
- rev_1_2.__class__ = cvs_revision_type_map[(
- isinstance(rev_1_2, CVSRevisionModification),
- isinstance(last_ntdbr, CVSRevisionModification),
- )]
-
- def process_live_ntdb(self, vendor_lod_items):
- """VENDOR_LOD_ITEMS is a live default branch; process it.
-
- In this case, all revisions on the default branch are NTDBRs and
- it is an error if there is also a '1.2' revision.
-
- Return True iff this transformation really does something. Raise
- a VendorBranchError if there is a '1.2' revision."""
-
- rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
- rev_1_2_id = rev_1_1.next_id
- if rev_1_2_id is not None:
- raise VendorBranchError(
- 'File \'%s\' has default branch=%s but also a revision %s'
- % (self.cvs_file.filename,
- vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
- )
-
- ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
-
- if ntdbr_cvs_revs:
- self.adjust_ntdbrs(ntdbr_cvs_revs)
- return True
- else:
- return False
-
- def process_historical_ntdb(self, vendor_lod_items):
- """There appears to have been a non-trunk default branch in the past.
-
- There is currently no default branch, but the branch described by
- file appears to have been imported. So our educated guess is that
- all revisions on the '1.1.1' branch (described by
- VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
- were non-trunk default branch revisions.
-
- Return True iff this transformation really does something.
-
- This really only handles standard '1.1.1.*'-style vendor
- revisions. One could conceivably have a file whose default branch
- is 1.1.3 or whatever, or was that at some point in time, with
- vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default
- branch gone now, we'd have no basis for assuming that the
- non-standard vendor branch had ever been the default branch
- anyway.
-
- Note that we rely on comparisons between the timestamps of the
- revisions on the vendor branch and that of revision 1.2, even
- though the timestamps might be incorrect due to clock skew. We
- could do a slightly better job if we used the changeset
- timestamps, as it is possible that the dependencies that went into
- determining those timestamps are more accurate. But that would
- require an extra pass or two."""
-
- rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
- rev_1_2_id = rev_1_1.next_id
-
- if rev_1_2_id is None:
- rev_1_2_timestamp = None
- else:
- rev_1_2_timestamp = self[rev_1_2_id].timestamp
-
- ntdbr_cvs_revs = []
- for cvs_rev in vendor_lod_items.cvs_revisions:
- if rev_1_2_timestamp is not None \
- and cvs_rev.timestamp >= rev_1_2_timestamp:
- # That's the end of the once-default branch.
- break
- ntdbr_cvs_revs.append(cvs_rev)
-
- if ntdbr_cvs_revs:
- self.adjust_ntdbrs(ntdbr_cvs_revs)
- return True
- else:
- return False
-
- def imported_remove_1_1(self, vendor_lod_items):
- """This file was imported. Remove the 1.1 revision if possible.
-
- VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
- See adjust_ntdbrs() for more information."""
-
- assert vendor_lod_items.cvs_revisions
- cvs_rev = vendor_lod_items.cvs_revisions[0]
-
- if isinstance(cvs_rev, CVSRevisionModification) \
- and not cvs_rev.deltatext_exists:
- cvs_branch = vendor_lod_items.cvs_branch
- rev_1_1 = self[cvs_branch.source_id]
- assert isinstance(rev_1_1, CVSRevision)
- Log().debug('Removing unnecessary revision %s' % (rev_1_1,))
-
- # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
- self._sever_branch(vendor_lod_items)
-
- # Delete rev_1_1:
- self.root_ids.remove(rev_1_1.id)
- del self[rev_1_1.id]
- rev_1_2_id = rev_1_1.next_id
- if rev_1_2_id is not None:
- rev_1_2 = self[rev_1_2_id]
- rev_1_2.prev_id = None
- self.root_ids.add(rev_1_2.id)
-
- # Move any tags and branches from rev_1_1 to cvs_rev:
- cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
- for id in rev_1_1.tag_ids:
- cvs_tag = self[id]
- cvs_tag.source_lod = cvs_rev.lod
- cvs_tag.source_id = cvs_rev.id
- cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
- for id in rev_1_1.branch_ids:
- cvs_branch = self[id]
- cvs_branch.source_lod = cvs_rev.lod
- cvs_branch.source_id = cvs_rev.id
- cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
- for id in rev_1_1.branch_commit_ids:
- cvs_rev2 = self[id]
- cvs_rev2.prev_id = cvs_rev.id
-
- def _delete_unneeded(self, cvs_item, metadata_db):
- if isinstance(cvs_item, CVSRevisionNoop) \
- and cvs_item.rev == '1.1' \
- and isinstance(cvs_item.lod, Trunk) \
- and len(cvs_item.branch_ids) >= 1 \
- and self[cvs_item.branch_ids[0]].next_id is not None \
- and not cvs_item.closed_symbols \
- and not cvs_item.ntdbr:
- # FIXME: This message will not match if the RCS file was renamed
- # manually after it was created.
- log_msg = metadata_db[cvs_item.metadata_id].log_msg
- cvs_generated_msg = 'file %s was initially added on branch %s.\n' % (
- self.cvs_file.basename,
- self[cvs_item.branch_ids[0]].symbol.name,)
- return log_msg == cvs_generated_msg
- else:
- return False
-
- def remove_unneeded_deletes(self, metadata_db):
- """Remove unneeded deletes for this file.
-
- If a file is added on a branch, then a trunk revision is added at
- the same time in the 'Dead' state. This revision doesn't do
- anything useful, so delete it."""
-
- for id in self.root_ids:
- cvs_item = self[id]
- if self._delete_unneeded(cvs_item, metadata_db):
- Log().debug('Removing unnecessary delete %s' % (cvs_item,))
-
- # Delete cvs_item:
- self.root_ids.remove(cvs_item.id)
- del self[id]
- if cvs_item.next_id is not None:
- cvs_rev_next = self[cvs_item.next_id]
- cvs_rev_next.prev_id = None
- self.root_ids.add(cvs_rev_next.id)
-
- # Delete all CVSBranches rooted at this revision. If there is
- # a CVSRevision on the branch, it should already be an add so
- # it doesn't have to be changed.
- for cvs_branch_id in cvs_item.branch_ids:
- cvs_branch = self[cvs_branch_id]
- del self[cvs_branch.id]
-
- if cvs_branch.next_id is not None:
- cvs_branch_next = self[cvs_branch.next_id]
- cvs_branch_next.first_on_branch_id = None
- cvs_branch_next.prev_id = None
- self.root_ids.add(cvs_branch_next.id)
-
- # Tagging a dead revision doesn't do anything, so remove any
- # tags that were set on 1.1:
- for cvs_tag_id in cvs_item.tag_ids:
- del self[cvs_tag_id]
-
- # This can only happen once per file, and we might have just
- # changed self.root_ids, so break out of the loop:
- break
-
- def _initial_branch_delete_unneeded(self, lod_items, metadata_db):
- """Return True iff the initial revision in LOD_ITEMS can be deleted."""
-
- if lod_items.cvs_branch is not None \
- and lod_items.cvs_branch.source_id is not None \
- and len(lod_items.cvs_revisions) >= 2:
- cvs_revision = lod_items.cvs_revisions[0]
- cvs_rev_source = self[lod_items.cvs_branch.source_id]
- if isinstance(cvs_revision, CVSRevisionAbsent) \
- and not cvs_revision.tag_ids \
- and not cvs_revision.branch_ids \
- and abs(cvs_revision.timestamp - cvs_rev_source.timestamp) <= 2:
- # FIXME: This message will not match if the RCS file was renamed
- # manually after it was created.
- log_msg = metadata_db[cvs_revision.metadata_id].log_msg
- return bool(re.match(
- r'file %s was added on branch .* on '
- r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
- '\n' % (re.escape(self.cvs_file.basename),),
- log_msg,
- ))
- return False
-
- def remove_initial_branch_deletes(self, metadata_db):
- """If the first revision on a branch is an unnecessary delete, remove it.
-
- If a file is added on a branch (whether or not it already existed
- on trunk), then new versions of CVS add a first branch revision in
- the 'dead' state (to indicate that the file did not exist on the
- branch when the branch was created) followed by the second branch
- revision, which is an add. When we encounter this situation, we
- sever the branch from trunk and delete the first branch
- revision."""
-
- for lod_items in self.iter_lods():
- if self._initial_branch_delete_unneeded(lod_items, metadata_db):
- cvs_revision = lod_items.cvs_revisions[0]
- Log().debug(
- 'Removing unnecessary initial branch delete %s' % (cvs_revision,)
- )
- cvs_branch = lod_items.cvs_branch
- cvs_rev_source = self[cvs_branch.source_id]
- cvs_rev_next = lod_items.cvs_revisions[1]
-
- # Delete cvs_revision:
- del self[cvs_revision.id]
- cvs_rev_next.prev_id = None
- self.root_ids.add(cvs_rev_next.id)
- cvs_rev_source.branch_commit_ids.remove(cvs_revision.id)
-
- # Delete the CVSBranch on which it is located:
- del self[cvs_branch.id]
- cvs_rev_source.branch_ids.remove(cvs_branch.id)
-
- def _exclude_tag(self, cvs_tag):
- """Exclude the specified CVS_TAG."""
-
- del self[cvs_tag.id]
-
- # A CVSTag is the successor of the CVSRevision that it
- # sprouts from. Delete this tag from that revision's
- # tag_ids:
- self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
-
- def _exclude_branch(self, lod_items):
- """Exclude the branch described by LOD_ITEMS, including its revisions.
-
- (Do not update the LOD_ITEMS instance itself.)
-
- If the LOD starts with non-trunk default branch revisions, leave
- the branch and the NTDB revisions in place, but delete any
- subsequent revisions that are not NTDB revisions. In this case,
- return True; otherwise return False"""
-
- if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
- for cvs_rev in lod_items.cvs_revisions:
- if not cvs_rev.ntdbr:
- # We've found the first non-NTDBR, and it's stored in cvs_rev:
- break
- else:
- # There was no revision following the NTDBRs:
- cvs_rev = None
-
- if cvs_rev:
- last_ntdbr = self[cvs_rev.prev_id]
- last_ntdbr.next_id = None
- while True:
- del self[cvs_rev.id]
- if cvs_rev.next_id is None:
- break
- cvs_rev = self[cvs_rev.next_id]
-
- return True
-
- else:
- if lod_items.cvs_branch is not None:
- # Delete the CVSBranch itself:
- cvs_branch = lod_items.cvs_branch
-
- del self[cvs_branch.id]
-
- # A CVSBranch is the successor of the CVSRevision that it
- # sprouts from. Delete this branch from that revision's
- # branch_ids:
- self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
-
- if lod_items.cvs_revisions:
- # The first CVSRevision on the branch has to be either detached
- # from the revision from which the branch sprang, or removed
- # from self.root_ids:
- cvs_rev = lod_items.cvs_revisions[0]
- if cvs_rev.prev_id is None:
- self.root_ids.remove(cvs_rev.id)
- else:
- self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
-
- for cvs_rev in lod_items.cvs_revisions:
- del self[cvs_rev.id]
-
- return False
-
- def graft_ntdbr_to_trunk(self):
- """Graft the non-trunk default branch revisions to trunk.
-
- They should already be alone on a branch that may or may not have
- a CVSBranch connecting it to trunk."""
-
- for lod_items in self.iter_lods():
- if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
- assert lod_items.is_pure_ntdb()
-
- first_rev = lod_items.cvs_revisions[0]
- last_rev = lod_items.cvs_revisions[-1]
- rev_1_1 = self.get(first_rev.prev_id)
- rev_1_2 = self.get(last_rev.ntdbr_next_id)
-
- if lod_items.cvs_branch is not None:
- self._sever_branch(lod_items)
-
- if rev_1_1 is not None:
- rev_1_1.next_id = first_rev.id
- first_rev.prev_id = rev_1_1.id
-
- self.root_ids.remove(first_rev.id)
-
- first_rev.__class__ = cvs_revision_type_map[(
- isinstance(first_rev, CVSRevisionModification),
- isinstance(rev_1_1, CVSRevisionModification),
- )]
-
- if rev_1_2 is not None:
- rev_1_2.ntdbr_prev_id = None
- last_rev.ntdbr_next_id = None
-
- if rev_1_2.prev_id is None:
- self.root_ids.remove(rev_1_2.id)
-
- rev_1_2.prev_id = last_rev.id
- last_rev.next_id = rev_1_2.id
-
- # The effective_pred_id of rev_1_2 was not changed, so we
- # don't have to change rev_1_2's type.
-
- for cvs_rev in lod_items.cvs_revisions:
- cvs_rev.ntdbr = False
- cvs_rev.lod = self.trunk
-
- for cvs_branch in lod_items.cvs_branches:
- cvs_branch.source_lod = self.trunk
-
- for cvs_tag in lod_items.cvs_tags:
- cvs_tag.source_lod = self.trunk
-
- return
-
- def exclude_non_trunk(self):
- """Delete all tags and branches."""
-
- ntdbr_excluded = False
- for lod_items in self.iter_lods():
- for cvs_tag in lod_items.cvs_tags[:]:
- self._exclude_tag(cvs_tag)
- lod_items.cvs_tags.remove(cvs_tag)
-
- if not isinstance(lod_items.lod, Trunk):
- assert not lod_items.cvs_branches
-
- ntdbr_excluded |= self._exclude_branch(lod_items)
-
- if ntdbr_excluded:
- self.graft_ntdbr_to_trunk()
-
- def filter_excluded_symbols(self, revision_excluder):
- """Delete any excluded symbols and references to them.
-
- Call the revision_excluder's callback methods to let it know what
- is being excluded."""
-
- ntdbr_excluded = False
- for lod_items in self.iter_lods():
- # Delete any excluded tags:
- for cvs_tag in lod_items.cvs_tags[:]:
- if isinstance(cvs_tag.symbol, ExcludedSymbol):
- self._exclude_tag(cvs_tag)
-
- lod_items.cvs_tags.remove(cvs_tag)
-
- # Delete the whole branch if it is to be excluded:
- if isinstance(lod_items.lod, ExcludedSymbol):
- # A symbol can only be excluded if no other symbols spring
- # from it. This was already checked in CollateSymbolsPass, so
- # these conditions should already be satisfied.
- assert not list(lod_items.iter_blockers())
-
- ntdbr_excluded |= self._exclude_branch(lod_items)
-
- if ntdbr_excluded:
- self.graft_ntdbr_to_trunk()
-
- revision_excluder.process_file(self)
-
- def _mutate_branch_to_tag(self, cvs_branch):
- """Mutate the branch CVS_BRANCH into a tag."""
-
- if cvs_branch.next_id is not None:
- # This shouldn't happen because it was checked in
- # CollateSymbolsPass:
- raise FatalError('Attempt to exclude a branch with commits.')
- cvs_tag = CVSTag(
- cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
- cvs_branch.source_lod, cvs_branch.source_id,
- cvs_branch.revision_recorder_token,
- )
- self.add(cvs_tag)
- cvs_revision = self[cvs_tag.source_id]
- cvs_revision.branch_ids.remove(cvs_tag.id)
- cvs_revision.tag_ids.append(cvs_tag.id)
-
- def _mutate_tag_to_branch(self, cvs_tag):
- """Mutate the tag into a branch."""
-
- cvs_branch = CVSBranch(
- cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
- None, cvs_tag.source_lod, cvs_tag.source_id, None,
- cvs_tag.revision_recorder_token,
- )
- self.add(cvs_branch)
- cvs_revision = self[cvs_branch.source_id]
- cvs_revision.tag_ids.remove(cvs_branch.id)
- cvs_revision.branch_ids.append(cvs_branch.id)
-
- def _mutate_symbol(self, cvs_symbol):
- """Mutate CVS_SYMBOL if necessary."""
-
- symbol = cvs_symbol.symbol
- if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
- self._mutate_branch_to_tag(cvs_symbol)
- elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
- self._mutate_tag_to_branch(cvs_symbol)
-
- def mutate_symbols(self):
- """Force symbols to be tags/branches based on self.symbol_db."""
-
- for cvs_item in self.values():
- if isinstance(cvs_item, CVSRevision):
- # This CVSRevision may be affected by the mutation of any
- # CVSSymbols that it references, but there is nothing to do
- # here directly.
- pass
- elif isinstance(cvs_item, CVSSymbol):
- self._mutate_symbol(cvs_item)
- else:
- raise RuntimeError('Unknown cvs item type')
-
- def _adjust_tag_parent(self, cvs_tag):
- """Adjust the parent of CVS_TAG if possible and preferred.
-
- CVS_TAG is an instance of CVSTag. This method must be called in
- leaf-to-trunk order."""
-
- # The Symbol that cvs_tag would like to have as a parent:
- preferred_parent = Ctx()._symbol_db.get_symbol(
- cvs_tag.symbol.preferred_parent_id)
-
- if cvs_tag.source_lod == preferred_parent:
- # The preferred parent is already the parent.
- return
-
- # The CVSRevision that is its direct parent:
- source = self[cvs_tag.source_id]
- assert isinstance(source, CVSRevision)
-
- if isinstance(preferred_parent, Trunk):
- # It is not possible to graft *onto* Trunk:
- return
-
- # Try to find the preferred parent among the possible parents:
- for branch_id in source.branch_ids:
- if self[branch_id].symbol == preferred_parent:
- # We found it!
- break
- else:
- # The preferred parent is not a possible parent in this file.
- return
-
- parent = self[branch_id]
- assert isinstance(parent, CVSBranch)
-
- Log().debug('Grafting %s from %s (on %s) onto %s' % (
- cvs_tag, source, source.lod, parent,))
- # Switch parent:
- source.tag_ids.remove(cvs_tag.id)
- parent.tag_ids.append(cvs_tag.id)
- cvs_tag.source_lod = parent.symbol
- cvs_tag.source_id = parent.id
-
- def _adjust_branch_parents(self, cvs_branch):
- """Adjust the parent of CVS_BRANCH if possible and preferred.
-
- CVS_BRANCH is an instance of CVSBranch. This method must be
- called in leaf-to-trunk order."""
-
- # The Symbol that cvs_branch would like to have as a parent:
- preferred_parent = Ctx()._symbol_db.get_symbol(
- cvs_branch.symbol.preferred_parent_id)
-
- if cvs_branch.source_lod == preferred_parent:
- # The preferred parent is already the parent.
- return
-
- # The CVSRevision that is its direct parent:
- source = self[cvs_branch.source_id]
- # This is always a CVSRevision because we haven't adjusted it yet:
- assert isinstance(source, CVSRevision)
-
- if isinstance(preferred_parent, Trunk):
- # It is not possible to graft *onto* Trunk:
- return
-
- # Try to find the preferred parent among the possible parents:
- for branch_id in source.branch_ids:
- possible_parent = self[branch_id]
- if possible_parent.symbol == preferred_parent:
- # We found it!
- break
- elif possible_parent.symbol == cvs_branch.symbol:
- # Only branches that precede the branch to be adjusted are
- # considered possible parents. Leave parentage unchanged:
- return
- else:
- # This point should never be reached.
- raise InternalError(
- 'Possible parent search did not terminate as expected')
-
- parent = possible_parent
- assert isinstance(parent, CVSBranch)
-
- Log().debug('Grafting %s from %s (on %s) onto %s' % (
- cvs_branch, source, source.lod, parent,))
- # Switch parent:
- source.branch_ids.remove(cvs_branch.id)
- parent.branch_ids.append(cvs_branch.id)
- cvs_branch.source_lod = parent.symbol
- cvs_branch.source_id = parent.id
-
- def adjust_parents(self):
- """Adjust the parents of symbols to their preferred parents.
-
- If a CVSSymbol has a preferred parent that is different than its
- current parent, and if the preferred parent is an allowed parent
- of the CVSSymbol in this file, then graft the CVSSymbol onto its
- preferred parent."""
-
- for lod_items in self.iter_lods():
- for cvs_tag in lod_items.cvs_tags:
- self._adjust_tag_parent(cvs_tag)
-
- for cvs_branch in lod_items.cvs_branches:
- self._adjust_branch_parents(cvs_branch)
-
- def _get_revision_source(self, cvs_symbol):
- """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
-
- while True:
- cvs_item = self[cvs_symbol.source_id]
- if isinstance(cvs_item, CVSRevision):
- return cvs_item
- else:
- cvs_symbol = cvs_item
-
- def refine_symbols(self):
- """Refine the types of the CVSSymbols in this file.
-
- Adjust the symbol types based on whether the source exists:
- CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
-
- for lod_items in self.iter_lods():
- for cvs_tag in lod_items.cvs_tags:
- source = self._get_revision_source(cvs_tag)
- cvs_tag.__class__ = cvs_tag_type_map[
- isinstance(source, CVSRevisionModification)
- ]
-
- for cvs_branch in lod_items.cvs_branches:
- source = self._get_revision_source(cvs_branch)
- cvs_branch.__class__ = cvs_branch_type_map[
- isinstance(source, CVSRevisionModification)
- ]
-
- def record_opened_symbols(self):
- """Set CVSRevision.opened_symbols for the surviving revisions."""
-
- for cvs_item in self.values():
- if isinstance(cvs_item, (CVSRevision, CVSBranch)):
- cvs_item.opened_symbols = []
- for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
- cvs_symbol_opened = self[cvs_symbol_opened_id]
- cvs_item.opened_symbols.append(
- (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
- )
-
- def record_closed_symbols(self):
- """Set CVSRevision.closed_symbols for the surviving revisions.
-
- A CVSRevision closes the symbols that were opened by the CVSItems
- that the CVSRevision closes. Got it?
-
- This method must be called after record_opened_symbols()."""
-
- for cvs_item in self.values():
- if isinstance(cvs_item, CVSRevision):
- cvs_item.closed_symbols = []
- for cvs_item_closed_id in cvs_item.get_ids_closed():
- cvs_item_closed = self[cvs_item_closed_id]
- cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)
-
-
diff --git a/cvs2svn_lib/cvs_item.py b/cvs2svn_lib/cvs_item.py
deleted file mode 100644
index 5c01a24..0000000
--- a/cvs2svn_lib/cvs_item.py
+++ /dev/null
@@ -1,901 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes to store atomic CVS events.
-
-A CVSItem is a single event, pertaining to a single file, that can be
-determined to have occured based on the information in the CVS
-repository.
-
-The inheritance tree is as follows:
-
-CVSItem
-|
-+--CVSRevision
-| |
-| +--CVSRevisionModification (* -> 'Exp')
-| | |
-| | +--CVSRevisionAdd ('dead' -> 'Exp')
-| | |
-| | +--CVSRevisionChange ('Exp' -> 'Exp')
-| |
-| +--CVSRevisionAbsent (* -> 'dead')
-| |
-| +--CVSRevisionDelete ('Exp' -> 'dead')
-| |
-| +--CVSRevisionNoop ('dead' -> 'dead')
-|
-+--CVSSymbol
- |
- +--CVSBranch
- | |
- | +--CVSBranchNoop
- |
- +--CVSTag
- |
- +--CVSTagNoop
-
-"""
-
-
-from cvs2svn_lib.context import Ctx
-
-
-class CVSItem(object):
- __slots__ = [
- 'id',
- 'cvs_file',
- 'revision_recorder_token',
- ]
-
- def __init__(self, id, cvs_file, revision_recorder_token):
- self.id = id
- self.cvs_file = cvs_file
- self.revision_recorder_token = revision_recorder_token
-
- def __eq__(self, other):
- return self.id == other.id
-
- def __cmp__(self, other):
- return cmp(self.id, other.id)
-
- def __hash__(self):
- return self.id
-
- def __getstate__(self):
- raise NotImplementedError()
-
- def __setstate__(self, data):
- raise NotImplementedError()
-
- def get_svn_path(self):
- """Return the SVN path associated with this CVSItem."""
-
- raise NotImplementedError()
-
- def get_pred_ids(self):
- """Return the CVSItem.ids of direct predecessors of SELF.
-
- A predecessor is defined to be a CVSItem that has to have been
- committed before this one."""
-
- raise NotImplementedError()
-
- def get_succ_ids(self):
- """Return the CVSItem.ids of direct successors of SELF.
-
- A direct successor is defined to be a CVSItem that has this one as
- a direct predecessor."""
-
- raise NotImplementedError()
-
- def get_cvs_symbol_ids_opened(self):
- """Return an iterable over the ids of CVSSymbols that this item opens.
-
- The definition of 'open' is that the path corresponding to this
- CVSItem will have to be copied when filling the corresponding
- symbol."""
-
- raise NotImplementedError()
-
- def get_ids_closed(self):
- """Return an iterable over the CVSItem.ids of CVSItems closed by this one.
-
- A CVSItem A is said to close a CVSItem B if committing A causes B
- to be overwritten or deleted (no longer available) in the SVN
- repository. This is interesting because it sets the last SVN
- revision number from which the contents of B can be copied (for
- example, to fill a symbol). See the concrete implementations of
- this method for the exact rules about what closes what."""
-
- raise NotImplementedError()
-
- def check_links(self, cvs_file_items):
- """Check for consistency of links to other CVSItems.
-
- Other items can be looked up in CVS_FILE_ITEMS, which is an
- instance of CVSFileItems. Raise an AssertionError if there is a
- problem."""
-
- raise NotImplementedError()
-
- def __repr__(self):
- return '%s(%s)' % (self.__class__.__name__, self,)
-
-
-class CVSRevision(CVSItem):
- """Information about a single CVS revision.
-
- A CVSRevision holds the information known about a single version of
- a single file.
-
- Members:
-
- id -- (int) unique ID for this revision.
-
- cvs_file -- (CVSFile) CVSFile affected by this revision.
-
- timestamp -- (int) date stamp for this revision.
-
- metadata_id -- (int) id of metadata instance record in
- metadata_db.
-
- prev_id -- (int) id of the logically previous CVSRevision, either
- on the same or the source branch (or None).
-
- next_id -- (int) id of the logically next CVSRevision (or None).
-
- rev -- (string) the CVS revision number, e.g., '1.3'.
-
- deltatext_exists -- (bool) true iff this revision's deltatext is
- not empty.
-
- lod -- (LineOfDevelopment) LOD on which this revision occurred.
-
- first_on_branch_id -- (int or None) if this revision is the first
- on its branch, the cvs_branch_id of that branch; else, None.
-
- ntdbr -- (bool) true iff this is a non-trunk default branch
- revision.
-
- ntdbr_prev_id -- (int or None) Iff this is the 1.2 revision after
- the end of a default branch, the id of the last rev on the
- default branch; else, None.
-
- ntdbr_next_id -- (int or None) Iff this is the last revision on a
- default branch preceding a 1.2 rev, the id of the 1.2
- revision; else, None.
-
- tag_ids -- (list of int) ids of all CVSTags rooted at this
- CVSRevision.
-
- branch_ids -- (list of int) ids of all CVSBranches rooted at this
- CVSRevision.
-
- branch_commit_ids -- (list of int) ids of first CVSRevision
- committed on each branch rooted in this revision (for branches
- with commits).
-
- opened_symbols -- (None or list of (symbol_id, cvs_symbol_id)
- tuples) information about all CVSSymbols opened by this
- revision. This member is set in FilterSymbolsPass; before
- then, it is None.
-
- closed_symbols -- (None or list of (symbol_id, cvs_symbol_id)
- tuples) information about all CVSSymbols closed by this
- revision. This member is set in FilterSymbolsPass; before
- then, it is None.
-
- revision_recorder_token -- (arbitrary) a token that can be set by
- RevisionRecorder for the later use of RevisionReader.
-
- """
-
- __slots__ = [
- 'timestamp',
- 'metadata_id',
- 'prev_id',
- 'next_id',
- 'rev',
- 'deltatext_exists',
- 'lod',
- 'first_on_branch_id',
- 'ntdbr',
- 'ntdbr_prev_id',
- 'ntdbr_next_id',
- 'tag_ids',
- 'branch_ids',
- 'branch_commit_ids',
- 'opened_symbols',
- 'closed_symbols',
- ]
-
- def __init__(self,
- id, cvs_file,
- timestamp, metadata_id,
- prev_id, next_id,
- rev, deltatext_exists,
- lod, first_on_branch_id, ntdbr,
- ntdbr_prev_id, ntdbr_next_id,
- tag_ids, branch_ids, branch_commit_ids,
- revision_recorder_token):
- """Initialize a new CVSRevision object."""
-
- CVSItem.__init__(self, id, cvs_file, revision_recorder_token)
-
- self.timestamp = timestamp
- self.metadata_id = metadata_id
- self.prev_id = prev_id
- self.next_id = next_id
- self.rev = rev
- self.deltatext_exists = deltatext_exists
- self.lod = lod
- self.first_on_branch_id = first_on_branch_id
- self.ntdbr = ntdbr
- self.ntdbr_prev_id = ntdbr_prev_id
- self.ntdbr_next_id = ntdbr_next_id
- self.tag_ids = tag_ids
- self.branch_ids = branch_ids
- self.branch_commit_ids = branch_commit_ids
- self.opened_symbols = None
- self.closed_symbols = None
-
- def _get_cvs_path(self):
- return self.cvs_file.cvs_path
-
- cvs_path = property(_get_cvs_path)
-
- def get_svn_path(self):
- return self.lod.get_path(self.cvs_file.cvs_path)
-
- def __getstate__(self):
- """Return the contents of this instance, for pickling.
-
- The presence of this method improves the space efficiency of
- pickling CVSRevision instances."""
-
- return (
- self.id, self.cvs_file.id,
- self.timestamp, self.metadata_id,
- self.prev_id, self.next_id,
- self.rev,
- self.deltatext_exists,
- self.lod.id,
- self.first_on_branch_id,
- self.ntdbr,
- self.ntdbr_prev_id, self.ntdbr_next_id,
- self.tag_ids, self.branch_ids, self.branch_commit_ids,
- self.opened_symbols, self.closed_symbols,
- self.revision_recorder_token,
- )
-
- def __setstate__(self, data):
- (self.id, cvs_file_id,
- self.timestamp, self.metadata_id,
- self.prev_id, self.next_id,
- self.rev,
- self.deltatext_exists,
- lod_id,
- self.first_on_branch_id,
- self.ntdbr,
- self.ntdbr_prev_id, self.ntdbr_next_id,
- self.tag_ids, self.branch_ids, self.branch_commit_ids,
- self.opened_symbols, self.closed_symbols,
- self.revision_recorder_token) = data
- self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
- self.lod = Ctx()._symbol_db.get_symbol(lod_id)
-
- def get_effective_prev_id(self):
- """Return the ID of the effective predecessor of this item.
-
- This is the ID of the item that determines whether the object
- existed before this CVSRevision."""
-
- if self.ntdbr_prev_id is not None:
- return self.ntdbr_prev_id
- else:
- return self.prev_id
-
- def get_symbol_pred_ids(self):
- """Return the pred_ids for symbol predecessors."""
-
- retval = set()
- if self.first_on_branch_id is not None:
- retval.add(self.first_on_branch_id)
- return retval
-
- def get_pred_ids(self):
- retval = self.get_symbol_pred_ids()
- if self.prev_id is not None:
- retval.add(self.prev_id)
- if self.ntdbr_prev_id is not None:
- retval.add(self.ntdbr_prev_id)
- return retval
-
- def get_symbol_succ_ids(self):
- """Return the succ_ids for symbol successors."""
-
- retval = set()
- for id in self.branch_ids + self.tag_ids:
- retval.add(id)
- return retval
-
- def get_succ_ids(self):
- retval = self.get_symbol_succ_ids()
- if self.next_id is not None:
- retval.add(self.next_id)
- if self.ntdbr_next_id is not None:
- retval.add(self.ntdbr_next_id)
- for id in self.branch_commit_ids:
- retval.add(id)
- return retval
-
- def get_ids_closed(self):
- # Special handling is needed in the case of non-trunk default
- # branches. The following cases have to be handled:
- #
- # Case 1: Revision 1.1 not deleted; revision 1.2 exists:
- #
- # 1.1 -----------------> 1.2
- # \ ^ ^ /
- # \ | | /
- # 1.1.1.1 -> 1.1.1.2
- #
- # * 1.1.1.1 closes 1.1 (because its post-commit overwrites 1.1
- # on trunk)
- #
- # * 1.1.1.2 closes 1.1.1.1
- #
- # * 1.2 doesn't close anything (the post-commit from 1.1.1.1
- # already closed 1.1, and no symbols can sprout from the
- # post-commit of 1.1.1.2)
- #
- # Case 2: Revision 1.1 not deleted; revision 1.2 does not exist:
- #
- # 1.1 ..................
- # \ ^ ^
- # \ | |
- # 1.1.1.1 -> 1.1.1.2
- #
- # * 1.1.1.1 closes 1.1 (because its post-commit overwrites 1.1
- # on trunk)
- #
- # * 1.1.1.2 closes 1.1.1.1
- #
- # Case 3: Revision 1.1 deleted; revision 1.2 exists:
- #
- # ............... 1.2
- # ^ ^ /
- # | | /
- # 1.1.1.1 -> 1.1.1.2
- #
- # * 1.1.1.1 doesn't close anything
- #
- # * 1.1.1.2 closes 1.1.1.1
- #
- # * 1.2 doesn't close anything (no symbols can sprout from the
- # post-commit of 1.1.1.2)
- #
- # Case 4: Revision 1.1 deleted; revision 1.2 doesn't exist:
- #
- # ...............
- # ^ ^
- # | |
- # 1.1.1.1 -> 1.1.1.2
- #
- # * 1.1.1.1 doesn't close anything
- #
- # * 1.1.1.2 closes 1.1.1.1
-
- if self.first_on_branch_id is not None:
- # The first CVSRevision on a branch is considered to close the
- # branch:
- yield self.first_on_branch_id
- if self.ntdbr:
- # If the 1.1 revision was not deleted, the 1.1.1.1 revision is
- # considered to close it:
- yield self.prev_id
- elif self.ntdbr_prev_id is not None:
- # This is the special case of a 1.2 revision that follows a
- # non-trunk default branch. Either 1.1 was deleted or the first
- # default branch revision closed 1.1, so we don't have to close
- # 1.1. Technically, we close the revision on trunk that was
- # copied from the last non-trunk default branch revision in a
- # post-commit, but for now no symbols can sprout from that
- # revision so we ignore that one, too.
- pass
- elif self.prev_id is not None:
- # Since this CVSRevision is not the first on a branch, its
- # prev_id is on the same LOD and this item closes that one:
- yield self.prev_id
-
- def _get_branch_ids_recursively(self, cvs_file_items):
- """Return the set of all CVSBranches that sprout from this CVSRevision.
-
- After parent adjustment in FilterSymbolsPass, it is possible for
- branches to sprout directly from a CVSRevision, or from those
- branches, etc. Return all branches that sprout from this
- CVSRevision, directly or indirectly."""
-
- retval = set()
- branch_ids_to_process = list(self.branch_ids)
- while branch_ids_to_process:
- branch = cvs_file_items[branch_ids_to_process.pop()]
- retval.add(branch)
- branch_ids_to_process.extend(branch.branch_ids)
-
- return retval
-
- def check_links(self, cvs_file_items):
- assert self.cvs_file == cvs_file_items.cvs_file
-
- prev = cvs_file_items.get(self.prev_id)
- next = cvs_file_items.get(self.next_id)
- first_on_branch = cvs_file_items.get(self.first_on_branch_id)
- ntdbr_next = cvs_file_items.get(self.ntdbr_next_id)
- ntdbr_prev = cvs_file_items.get(self.ntdbr_prev_id)
- effective_prev = cvs_file_items.get(self.get_effective_prev_id())
-
- if prev is None:
- # This is the first CVSRevision on trunk or a detached branch:
- assert self.id in cvs_file_items.root_ids
- elif first_on_branch is not None:
- # This is the first CVSRevision on an existing branch:
- assert isinstance(first_on_branch, CVSBranch)
- assert first_on_branch.symbol == self.lod
- assert first_on_branch.next_id == self.id
- cvs_revision_source = first_on_branch.get_cvs_revision_source(
- cvs_file_items
- )
- assert cvs_revision_source.id == prev.id
- assert self.id in prev.branch_commit_ids
- else:
- # This revision follows another revision on the same LOD:
- assert prev.next_id == self.id
- assert prev.lod == self.lod
-
- if next is not None:
- assert next.prev_id == self.id
- assert next.lod == self.lod
-
- if ntdbr_next is not None:
- assert self.ntdbr
- assert ntdbr_next.ntdbr_prev_id == self.id
-
- if ntdbr_prev is not None:
- assert ntdbr_prev.ntdbr_next_id == self.id
-
- for tag_id in self.tag_ids:
- tag = cvs_file_items[tag_id]
- assert isinstance(tag, CVSTag)
- assert tag.source_id == self.id
- assert tag.source_lod == self.lod
-
- for branch_id in self.branch_ids:
- branch = cvs_file_items[branch_id]
- assert isinstance(branch, CVSBranch)
- assert branch.source_id == self.id
- assert branch.source_lod == self.lod
-
- branch_commit_ids = list(self.branch_commit_ids)
-
- for branch in self._get_branch_ids_recursively(cvs_file_items):
- assert isinstance(branch, CVSBranch)
- if branch.next_id is not None:
- assert branch.next_id in branch_commit_ids
- branch_commit_ids.remove(branch.next_id)
-
- assert not branch_commit_ids
-
- assert self.__class__ == cvs_revision_type_map[(
- isinstance(self, CVSRevisionModification),
- effective_prev is not None
- and isinstance(effective_prev, CVSRevisionModification),
- )]
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%s:%s<%x>' % (self.cvs_file, self.rev, self.id,)
-
-
-class CVSRevisionModification(CVSRevision):
- """Base class for CVSRevisionAdd or CVSRevisionChange."""
-
- __slots__ = []
-
- def get_cvs_symbol_ids_opened(self):
- return self.tag_ids + self.branch_ids
-
-
-class CVSRevisionAdd(CVSRevisionModification):
- """A CVSRevision that creates a file that previously didn't exist.
-
- The file might have never existed on this LOD, or it might have
- existed previously but been deleted by a CVSRevisionDelete."""
-
- __slots__ = []
-
-
-class CVSRevisionChange(CVSRevisionModification):
- """A CVSRevision that modifies a file that already existed on this LOD."""
-
- __slots__ = []
-
-
-class CVSRevisionAbsent(CVSRevision):
- """A CVSRevision for which the file is nonexistent on this LOD."""
-
- __slots__ = []
-
- def get_cvs_symbol_ids_opened(self):
- return []
-
-
-class CVSRevisionDelete(CVSRevisionAbsent):
- """A CVSRevision that deletes a file that existed on this LOD."""
-
- __slots__ = []
-
-
-class CVSRevisionNoop(CVSRevisionAbsent):
- """A CVSRevision that doesn't do anything.
-
- The revision was 'dead' and the predecessor either didn't exist or
- was also 'dead'. These revisions can't necessarily be thrown away
- because (1) they impose ordering constraints on other items; (2)
- they might have a nontrivial log message that we don't want to throw
- away."""
-
- __slots__ = []
-
-
-# A map
-#
-# {(nondead(cvs_rev), nondead(prev_cvs_rev)) : cvs_revision_subtype}
-#
-# , where nondead() means that the cvs revision exists and is not
-# 'dead', and CVS_REVISION_SUBTYPE is the subtype of CVSRevision that
-# should be used for CVS_REV.
-cvs_revision_type_map = {
- (False, False) : CVSRevisionNoop,
- (False, True) : CVSRevisionDelete,
- (True, False) : CVSRevisionAdd,
- (True, True) : CVSRevisionChange,
- }
-
-
-class CVSSymbol(CVSItem):
- """Represent a symbol on a particular CVSFile.
-
- This is the base class for CVSBranch and CVSTag.
-
- Members:
-
- id -- (int) unique ID for this item.
-
- cvs_file -- (CVSFile) CVSFile affected by this item.
-
- symbol -- (Symbol) the symbol affected by this CVSSymbol.
-
- source_lod -- (LineOfDevelopment) the LOD that is the source for
- this CVSSymbol.
-
- source_id -- (int) the ID of the CVSRevision or CVSBranch that is
- the source for this item. This initially points to a
- CVSRevision, but can be changed to a CVSBranch via parent
- adjustment in FilterSymbolsPass.
-
- revision_recorder_token -- (arbitrary) a token that can be set by
- RevisionRecorder for the later use of RevisionReader.
-
- """
-
- __slots__ = [
- 'symbol',
- 'source_lod',
- 'source_id',
- ]
-
- def __init__(
- self, id, cvs_file, symbol, source_lod, source_id,
- revision_recorder_token
- ):
- """Initialize a CVSSymbol object."""
-
- CVSItem.__init__(self, id, cvs_file, revision_recorder_token)
-
- self.symbol = symbol
- self.source_lod = source_lod
- self.source_id = source_id
-
- def get_cvs_revision_source(self, cvs_file_items):
- """Return the CVSRevision that is the ultimate source of this symbol."""
-
- cvs_source = cvs_file_items[self.source_id]
- while not isinstance(cvs_source, CVSRevision):
- cvs_source = cvs_file_items[cvs_source.source_id]
-
- return cvs_source
-
- def get_svn_path(self):
- return self.symbol.get_path(self.cvs_file.cvs_path)
-
- def get_ids_closed(self):
- # A Symbol does not close any other CVSItems:
- return []
-
-
-class CVSBranch(CVSSymbol):
- """Represent the creation of a branch in a particular CVSFile.
-
- Members:
-
- id -- (int) unique ID for this item.
-
- cvs_file -- (CVSFile) CVSFile affected by this item.
-
- symbol -- (Symbol) the symbol affected by this CVSSymbol.
-
- branch_number -- (string) the number of this branch (e.g.,
- '1.3.4'), or None if this is a converted CVSTag.
-
- source_lod -- (LineOfDevelopment) the LOD that is the source for
- this CVSSymbol.
-
- source_id -- (int) id of the CVSRevision or CVSBranch from which
- this branch sprouts. This initially points to a CVSRevision,
- but can be changed to a CVSBranch via parent adjustment in
- FilterSymbolsPass.
-
- next_id -- (int or None) id of first CVSRevision on this branch,
- if any; else, None.
-
- tag_ids -- (list of int) ids of all CVSTags rooted at this
- CVSBranch (can be set due to parent adjustment in
- FilterSymbolsPass).
-
- branch_ids -- (list of int) ids of all CVSBranches rooted at this
- CVSBranch (can be set due to parent adjustment in
- FilterSymbolsPass).
-
- opened_symbols -- (None or list of (symbol_id, cvs_symbol_id)
- tuples) information about all CVSSymbols opened by this
- branch. This member is set in FilterSymbolsPass; before then,
- it is None.
-
- revision_recorder_token -- (arbitrary) a token that can be set by
- RevisionRecorder for the later use of RevisionReader.
-
- """
-
- __slots__ = [
- 'branch_number',
- 'next_id',
- 'tag_ids',
- 'branch_ids',
- 'opened_symbols',
- ]
-
- def __init__(
- self, id, cvs_file, symbol, branch_number,
- source_lod, source_id, next_id,
- revision_recorder_token,
- ):
- """Initialize a CVSBranch."""
-
- CVSSymbol.__init__(
- self, id, cvs_file, symbol, source_lod, source_id,
- revision_recorder_token
- )
- self.branch_number = branch_number
- self.next_id = next_id
- self.tag_ids = []
- self.branch_ids = []
- self.opened_symbols = None
-
- def __getstate__(self):
- return (
- self.id, self.cvs_file.id,
- self.symbol.id, self.branch_number,
- self.source_lod.id, self.source_id, self.next_id,
- self.tag_ids, self.branch_ids,
- self.opened_symbols,
- self.revision_recorder_token,
- )
-
- def __setstate__(self, data):
- (
- self.id, cvs_file_id,
- symbol_id, self.branch_number,
- source_lod_id, self.source_id, self.next_id,
- self.tag_ids, self.branch_ids,
- self.opened_symbols,
- self.revision_recorder_token,
- ) = data
- self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
- self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
- self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id)
-
- def get_pred_ids(self):
- return set([self.source_id])
-
- def get_succ_ids(self):
- retval = set(self.tag_ids + self.branch_ids)
- if self.next_id is not None:
- retval.add(self.next_id)
- return retval
-
- def get_cvs_symbol_ids_opened(self):
- return self.tag_ids + self.branch_ids
-
- def check_links(self, cvs_file_items):
- source = cvs_file_items.get(self.source_id)
- next = cvs_file_items.get(self.next_id)
-
- assert self.id in source.branch_ids
- if isinstance(source, CVSRevision):
- assert self.source_lod == source.lod
- elif isinstance(source, CVSBranch):
- assert self.source_lod == source.symbol
- else:
- assert False
-
- if next is not None:
- assert isinstance(next, CVSRevision)
- assert next.lod == self.symbol
- assert next.first_on_branch_id == self.id
-
- for tag_id in self.tag_ids:
- tag = cvs_file_items[tag_id]
- assert isinstance(tag, CVSTag)
- assert tag.source_id == self.id
- assert tag.source_lod == self.symbol
-
- for branch_id in self.branch_ids:
- branch = cvs_file_items[branch_id]
- assert isinstance(branch, CVSBranch)
- assert branch.source_id == self.id
- assert branch.source_lod == self.symbol
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%s:%s:%s<%x>' \
- % (self.cvs_file, self.symbol, self.branch_number, self.id,)
-
-
-class CVSBranchNoop(CVSBranch):
- """A CVSBranch whose source is a CVSRevisionAbsent."""
-
- __slots__ = []
-
- def get_cvs_symbol_ids_opened(self):
- return []
-
-
-# A map
-#
-# {nondead(source_cvs_rev) : cvs_branch_subtype}
-#
-# , where nondead() means that the cvs revision exists and is not
-# 'dead', and CVS_BRANCH_SUBTYPE is the subtype of CVSBranch that
-# should be used.
-cvs_branch_type_map = {
- False : CVSBranchNoop,
- True : CVSBranch,
- }
-
-
-class CVSTag(CVSSymbol):
- """Represent the creation of a tag on a particular CVSFile.
-
- Members:
-
- id -- (int) unique ID for this item.
-
- cvs_file -- (CVSFile) CVSFile affected by this item.
-
- symbol -- (Symbol) the symbol affected by this CVSSymbol.
-
- source_lod -- (LineOfDevelopment) the LOD that is the source for
- this CVSSymbol.
-
- source_id -- (int) the ID of the CVSRevision or CVSBranch that is
- being tagged. This initially points to a CVSRevision, but can
- be changed to a CVSBranch via parent adjustment in
- FilterSymbolsPass.
-
- revision_recorder_token -- (arbitrary) a token that can be set by
- RevisionRecorder for the later use of RevisionReader.
-
- """
-
- __slots__ = []
-
- def __init__(
- self, id, cvs_file, symbol, source_lod, source_id,
- revision_recorder_token,
- ):
- """Initialize a CVSTag."""
-
- CVSSymbol.__init__(
- self, id, cvs_file, symbol, source_lod, source_id,
- revision_recorder_token,
- )
-
- def __getstate__(self):
- return (
- self.id, self.cvs_file.id, self.symbol.id,
- self.source_lod.id, self.source_id,
- self.revision_recorder_token,
- )
-
- def __setstate__(self, data):
- (
- self.id, cvs_file_id, symbol_id, source_lod_id, self.source_id,
- self.revision_recorder_token,
- ) = data
- self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
- self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
- self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id)
-
- def get_pred_ids(self):
- return set([self.source_id])
-
- def get_succ_ids(self):
- return set()
-
- def get_cvs_symbol_ids_opened(self):
- return []
-
- def check_links(self, cvs_file_items):
- source = cvs_file_items.get(self.source_id)
-
- assert self.id in source.tag_ids
- if isinstance(source, CVSRevision):
- assert self.source_lod == source.lod
- elif isinstance(source, CVSBranch):
- assert self.source_lod == source.symbol
- else:
- assert False
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%s:%s<%x>' \
- % (self.cvs_file, self.symbol, self.id,)
-
-
-class CVSTagNoop(CVSTag):
- """A CVSTag whose source is a CVSRevisionAbsent."""
-
- __slots__ = []
-
-
-# A map
-#
-# {nondead(source_cvs_rev) : cvs_tag_subtype}
-#
-# , where nondead() means that the cvs revision exists and is not
-# 'dead', and CVS_TAG_SUBTYPE is the subtype of CVSTag that should be
-# used.
-cvs_tag_type_map = {
- False : CVSTagNoop,
- True : CVSTag,
- }
-
-
diff --git a/cvs2svn_lib/cvs_item_database.py b/cvs2svn_lib/cvs_item_database.py
deleted file mode 100644
index f072252..0000000
--- a/cvs2svn_lib/cvs_item_database.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains a database that can store arbitrary CVSItems."""
-
-
-import re
-import cPickle
-
-from cvs2svn_lib.cvs_item import CVSRevisionAdd
-from cvs2svn_lib.cvs_item import CVSRevisionChange
-from cvs2svn_lib.cvs_item import CVSRevisionDelete
-from cvs2svn_lib.cvs_item import CVSRevisionNoop
-from cvs2svn_lib.cvs_item import CVSBranch
-from cvs2svn_lib.cvs_item import CVSBranchNoop
-from cvs2svn_lib.cvs_item import CVSTag
-from cvs2svn_lib.cvs_item import CVSTagNoop
-from cvs2svn_lib.cvs_file_items import CVSFileItems
-from cvs2svn_lib.serializer import Serializer
-from cvs2svn_lib.serializer import PrimedPickleSerializer
-from cvs2svn_lib.database import IndexedStore
-
-
-cvs_item_primer = (
- CVSRevisionAdd, CVSRevisionChange,
- CVSRevisionDelete, CVSRevisionNoop,
- CVSBranch, CVSBranchNoop,
- CVSTag, CVSTagNoop,
- )
-
-
-class NewCVSItemStore:
- """A file of sequential CVSItems, grouped by CVSFile.
-
- The file consists of a sequence of pickles. The zeroth one is a
- Serializer as described in the serializer module. Subsequent ones
- are pickled lists of CVSItems, each list containing all of the
- CVSItems for a single file.
-
- We don't use a single pickler for all items because the memo would
- grow too large."""
-
- def __init__(self, filename):
- """Initialize an instance, creating the file and writing the primer."""
-
- self.f = open(filename, 'wb')
-
- self.serializer = PrimedPickleSerializer(
- cvs_item_primer + (CVSFileItems,)
- )
- cPickle.dump(self.serializer, self.f, -1)
-
- def add(self, cvs_file_items):
- """Write CVS_FILE_ITEMS into the database."""
-
- self.serializer.dumpf(self.f, cvs_file_items)
-
- def close(self):
- self.f.close()
- self.f = None
-
-
-class OldCVSItemStore:
- """Read a file created by NewCVSItemStore.
-
- The file must be read sequentially, one CVSFileItems instance at a
- time."""
-
- def __init__(self, filename):
- self.f = open(filename, 'rb')
-
- # Read the memo from the first pickle:
- self.serializer = cPickle.load(self.f)
-
- def iter_cvs_file_items(self):
- """Iterate through the CVSFileItems instances, one file at a time.
-
- Each time yield a CVSFileItems instance for one CVSFile."""
-
- try:
- while True:
- yield self.serializer.loadf(self.f)
- except EOFError:
- return
-
- def close(self):
- self.f.close()
- self.f = None
-
-
-class LinewiseSerializer(Serializer):
- """A serializer that writes exactly one line for each object.
-
- The actual serialization is done by a wrapped serializer; this class
- only escapes any newlines in the serialized data then appends a
- single newline."""
-
- def __init__(self, wrapee):
- self.wrapee = wrapee
-
- @staticmethod
- def _encode_newlines(s):
- """Return s with newlines and backslashes encoded.
-
- The string is returned with the following character transformations:
-
- LF -> \n
- CR -> \r
- ^Z -> \z (needed for Windows)
- \ -> \\
-
- """
-
- return s.replace('\\', '\\\\') \
- .replace('\n', '\\n') \
- .replace('\r', '\\r') \
- .replace('\x1a', '\\z')
-
- _escape_re = re.compile(r'(\\\\|\\n|\\r|\\z)')
- _subst = {'\\n' : '\n', '\\r' : '\r', '\\z' : '\x1a', '\\\\' : '\\'}
-
- @staticmethod
- def _decode_newlines(s):
- """Return s with newlines and backslashes decoded.
-
- This function reverses the encoding of _encode_newlines().
-
- """
-
- def repl(m):
- return LinewiseSerializer._subst[m.group(1)]
-
- return LinewiseSerializer._escape_re.sub(repl, s)
-
- def dumpf(self, f, object):
- f.write(self.dumps(object))
-
- def dumps(self, object):
- return self._encode_newlines(self.wrapee.dumps(object)) + '\n'
-
- def loadf(self, f):
- return self.loads(f.readline())
-
- def loads(self, s):
- return self.wrapee.loads(self._decode_newlines(s[:-1]))
-
-
-class NewSortableCVSRevisionDatabase(object):
- """A serially-accessible, sortable file for holding CVSRevisions.
-
- This class creates such files."""
-
- def __init__(self, filename, serializer):
- self.f = open(filename, 'w')
- self.serializer = LinewiseSerializer(serializer)
-
- def add(self, cvs_rev):
- self.f.write(
- '%x %08x %s' % (
- cvs_rev.metadata_id, cvs_rev.timestamp,
- self.serializer.dumps(cvs_rev),
- )
- )
-
- def close(self):
- self.f.close()
- self.f = None
-
-
-class OldSortableCVSRevisionDatabase(object):
- """A serially-accessible, sortable file for holding CVSRevisions.
-
- This class reads such files."""
-
- def __init__(self, filename, serializer):
- self.filename = filename
- self.serializer = LinewiseSerializer(serializer)
-
- def __iter__(self):
- f = open(self.filename, 'r')
- for l in f:
- s = l.split(' ', 2)[-1]
- yield self.serializer.loads(s)
- f.close()
-
- def close(self):
- pass
-
-
-class NewSortableCVSSymbolDatabase(object):
- """A serially-accessible, sortable file for holding CVSSymbols.
-
- This class creates such files."""
-
- def __init__(self, filename, serializer):
- self.f = open(filename, 'w')
- self.serializer = LinewiseSerializer(serializer)
-
- def add(self, cvs_symbol):
- self.f.write(
- '%x %s' % (cvs_symbol.symbol.id, self.serializer.dumps(cvs_symbol))
- )
-
- def close(self):
- self.f.close()
- self.f = None
-
-
-class OldSortableCVSSymbolDatabase(object):
- """A serially-accessible, sortable file for holding CVSSymbols.
-
- This class reads such files."""
-
- def __init__(self, filename, serializer):
- self.filename = filename
- self.serializer = LinewiseSerializer(serializer)
-
- def __iter__(self):
- f = open(self.filename, 'r')
- for l in f:
- s = l.split(' ', 1)[-1]
- yield self.serializer.loads(s)
- f.close()
-
- def close(self):
- pass
-
-
-def IndexedCVSItemStore(filename, index_filename, mode):
- return IndexedStore(
- filename, index_filename, mode,
- PrimedPickleSerializer(cvs_item_primer)
- )
-
-
diff --git a/cvs2svn_lib/cvs_revision_manager.py b/cvs2svn_lib/cvs_revision_manager.py
deleted file mode 100644
index 6f5de3b..0000000
--- a/cvs2svn_lib/cvs_revision_manager.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Access the CVS repository via CVS's 'cvs' command."""
-
-
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.process import check_command_runs
-from cvs2svn_lib.process import PipeStream
-from cvs2svn_lib.process import CommandFailedException
-from cvs2svn_lib.revision_manager import RevisionReader
-
-
-class CVSRevisionReader(RevisionReader):
- """A RevisionReader that reads the contents via CVS."""
-
- # Different versions of CVS support different global arguments.
- # Here are the global arguments that we try to use, in order of
- # decreasing preference:
- _possible_global_arguments = [
- ['-q', '-R', '-f'],
- ['-q', '-R'],
- ['-q', '-f'],
- ['-q'],
- ]
-
- def __init__(self, cvs_executable):
- self.cvs_executable = cvs_executable
-
- for global_arguments in self._possible_global_arguments:
- try:
- self._check_cvs_runs(global_arguments)
- except CommandFailedException, e:
- pass
- else:
- # Those global arguments were OK; use them for all CVS invocations.
- self.global_arguments = global_arguments
- break
- else:
- raise FatalError(
- '%s\n'
- 'Please check that cvs is installed and in your PATH.' % (e,)
- )
-
- def _check_cvs_runs(self, global_arguments):
- """Check that CVS can be started.
-
- Try running 'cvs --version' with the current setting for
- self.cvs_executable and the specified global_arguments. If not
- successful, raise a CommandFailedException."""
-
- check_command_runs(
- [self.cvs_executable] + global_arguments + ['--version'],
- self.cvs_executable,
- )
-
- def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
- project = cvs_rev.cvs_file.project
- pipe_cmd = [
- self.cvs_executable
- ] + self.global_arguments + [
- '-d', project.cvs_repository_root,
- 'co',
- '-r' + cvs_rev.rev,
- '-p'
- ]
- if suppress_keyword_substitution:
- pipe_cmd.append('-kk')
- pipe_cmd.append(project.cvs_module + cvs_rev.cvs_path)
- return PipeStream(pipe_cmd)
-
-
diff --git a/cvs2svn_lib/database.py b/cvs2svn_lib/database.py
deleted file mode 100644
index 9db9be2..0000000
--- a/cvs2svn_lib/database.py
+++ /dev/null
@@ -1,322 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains database facilities used by cvs2svn."""
-
-
-import sys
-import os
-import cPickle
-
-from cvs2svn_lib.common import DB_OPEN_READ
-from cvs2svn_lib.common import DB_OPEN_WRITE
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.common import warning_prefix
-from cvs2svn_lib.common import error_prefix
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.record_table import FileOffsetPacker
-from cvs2svn_lib.record_table import RecordTable
-
-
-# DBM module selection
-
-# 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
-# so that the dbhash module used by anydbm will use bsddb3.
-try:
- import bsddb3
- sys.modules['bsddb'] = sys.modules['bsddb3']
-except ImportError:
- pass
-
-# 2. These DBM modules are not good for cvs2svn.
-import anydbm
-if anydbm._defaultmod.__name__ in ['dumbdbm', 'dbm']:
- Log().error(
- '%s: cvs2svn uses the anydbm package, which depends on lower level '
- 'dbm\n'
- 'libraries. Your system has %s, with which cvs2svn is known to have\n'
- 'problems. To use cvs2svn, you must install a Python dbm library '
- 'other than\n'
- 'dumbdbm or dbm. See '
- 'http://python.org/doc/current/lib/module-anydbm.html\n'
- 'for more information.\n'
- % (error_prefix, anydbm._defaultmod.__name__,)
- )
- sys.exit(1)
-
-# 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
-# Unfortunately, gdbm appears not to be trouble free, either.
-if hasattr(anydbm._defaultmod, 'bsddb') \
- and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
- try:
- gdbm = __import__('gdbm')
- except ImportError:
- Log().warn(
- '%s: The version of the bsddb module found on your computer '
- 'has been\n'
- 'reported to malfunction on some datasets, causing KeyError '
- 'exceptions.\n'
- % (warning_prefix,)
- )
- else:
- anydbm._defaultmod = gdbm
-
-
-class Database:
- """A database that uses a Serializer to store objects of a certain type.
-
- The serializer is stored in the database under the key
- self.serializer_key. (This implies that self.serializer_key may not
- be used as a key for normal entries.)
-
- The backing database is an anydbm-based DBM.
-
- """
-
- serializer_key = '_.%$1\t;_ '
-
- def __init__(self, filename, mode, serializer=None):
- """Constructor.
-
- The database stores its Serializer, so none needs to be supplied
- when opening an existing database."""
-
- # pybsddb3 has a bug which prevents it from working with
- # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
- # causes the DB_TRUNCATE flag to be passed, which is disallowed
- # for databases protected by lock and transaction support
- # (bsddb databases use locking from bsddb version 4.2.4 onwards).
- #
- # Therefore, manually perform the removal (we can do this, because
- # we know that for bsddb - but *not* anydbm in general - the database
- # consists of one file with the name we specify, rather than several
- # based on that name).
- if mode == DB_OPEN_NEW and anydbm._defaultmod.__name__ == 'dbhash':
- if os.path.isfile(filename):
- os.unlink(filename)
- self.db = anydbm.open(filename, 'c')
- else:
- self.db = anydbm.open(filename, mode)
-
- # Import implementations for many mapping interface methods.
- for meth_name in ('__delitem__',
- '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
- meth_ref = getattr(self.db, meth_name, None)
- if meth_ref:
- setattr(self, meth_name, meth_ref)
-
- if mode == DB_OPEN_NEW:
- self.serializer = serializer
- self.db[self.serializer_key] = cPickle.dumps(self.serializer)
- else:
- self.serializer = cPickle.loads(self.db[self.serializer_key])
-
- def __getitem__(self, key):
- return self.serializer.loads(self.db[key])
-
- def __setitem__(self, key, value):
- self.db[key] = self.serializer.dumps(value)
-
- def __delitem__(self, key):
- # gdbm defines a __delitem__ method, but it cannot be assigned. So
- # this method provides a fallback definition via explicit delegation:
- del self.db[key]
-
- def keys(self):
- retval = self.db.keys()
- retval.remove(self.serializer_key)
- return retval
-
- def __iter__(self):
- for key in self.keys():
- yield key
-
- def has_key(self, key):
- try:
- self.db[key]
- return True
- except KeyError:
- return False
-
- def __contains__(self, key):
- return self.has_key(key)
-
- def iterkeys(self):
- return self.__iter__()
-
- def clear(self):
- for key in self.keys():
- del self[key]
-
- def items(self):
- return [(key, self[key],) for key in self.keys()]
-
- def values(self):
- return [self[key] for key in self.keys()]
-
- def get(self, key, default=None):
- try:
- return self[key]
- except KeyError:
- return default
-
- def close(self):
- self.db.close()
- self.db = None
-
-
-class IndexedDatabase:
- """A file of objects that are written sequentially and read randomly.
-
- The objects are indexed by small non-negative integers, and a
- RecordTable is used to store the index -> fileoffset map.
- fileoffset=0 is used to represent an empty record. (An offset of 0
- cannot occur for a legitimate record because the serializer is
- written there.)
-
- The main file consists of a sequence of pickles (or other serialized
- data format). The zeroth record is a pickled Serializer.
- Subsequent ones are objects serialized using the serializer. The
- offset of each object in the file is stored to an index table so
- that the data can later be retrieved randomly.
-
- Objects are always stored to the end of the file. If an object is
- deleted or overwritten, the fact is recorded in the index_table but
- the space in the pickle file is not garbage collected. This has the
- advantage that one can create a modified version of a database that
- shares the main data file with an old version by copying the index
- file. But it has the disadvantage that space is wasted whenever
- objects are written multiple times."""
-
- def __init__(self, filename, index_filename, mode, serializer=None):
- """Initialize an IndexedDatabase, writing the serializer if necessary.
-
- SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the
- serializer is read from the file."""
-
- self.filename = filename
- self.index_filename = index_filename
- self.mode = mode
- if self.mode == DB_OPEN_NEW:
- self.f = open(self.filename, 'wb+')
- elif self.mode == DB_OPEN_WRITE:
- self.f = open(self.filename, 'rb+')
- elif self.mode == DB_OPEN_READ:
- self.f = open(self.filename, 'rb')
- else:
- raise RuntimeError('Invalid mode %r' % self.mode)
-
- self.index_table = RecordTable(
- self.index_filename, self.mode, FileOffsetPacker()
- )
-
- if self.mode == DB_OPEN_NEW:
- assert serializer is not None
- self.serializer = serializer
- cPickle.dump(self.serializer, self.f, -1)
- else:
- # Read the memo from the first pickle:
- self.serializer = cPickle.load(self.f)
-
- # Seek to the end of the file, and record that position:
- self.f.seek(0, 2)
- self.fp = self.f.tell()
- self.eofp = self.fp
-
- def __setitem__(self, index, item):
- """Write ITEM into the database indexed by INDEX."""
-
- # Make sure we're at the end of the file:
- if self.fp != self.eofp:
- self.f.seek(self.eofp)
- self.index_table[index] = self.eofp
- s = self.serializer.dumps(item)
- self.f.write(s)
- self.eofp += len(s)
- self.fp = self.eofp
-
- def _fetch(self, offset):
- if self.fp != offset:
- self.f.seek(offset)
-
- # There is no easy way to tell how much data will be read, so just
- # indicate that we don't know the current file pointer:
- self.fp = None
-
- return self.serializer.loadf(self.f)
-
- def iterkeys(self):
- return self.index_table.iterkeys()
-
- def itervalues(self):
- for offset in self.index_table.itervalues():
- yield self._fetch(offset)
-
- def __getitem__(self, index):
- offset = self.index_table[index]
- return self._fetch(offset)
-
- def get(self, item, default=None):
- try:
- return self[item]
- except KeyError:
- return default
-
- def get_many(self, indexes, default=None):
- """Yield (index,item) tuples for INDEXES, in arbitrary order.
-
- Yield (index,default) for indexes with no defined values."""
-
- offsets = []
- for (index, offset) in self.index_table.get_many(indexes):
- if offset is None:
- yield (index, default)
- else:
- offsets.append((offset, index))
-
- # Sort the offsets to reduce disk seeking:
- offsets.sort()
- for (offset,index) in offsets:
- yield (index, self._fetch(offset))
-
- def __delitem__(self, index):
- # We don't actually free the data in self.f.
- del self.index_table[index]
-
- def close(self):
- self.index_table.close()
- self.index_table = None
- self.f.close()
- self.f = None
-
- def __str__(self):
- return 'IndexedDatabase(%r)' % (self.filename,)
-
-
-class IndexedStore(IndexedDatabase):
- """A file of items that is written sequentially and read randomly.
-
- This is just like IndexedDatabase, except that it has an additional
- add() method which assumes that the object to be written to the
- database has an 'id' member, which is used as its database index.
- See IndexedDatabase for more information."""
-
- def add(self, item):
- """Write ITEM into the database indexed by ITEM.id."""
-
- self[item.id] = item
-
-
diff --git a/cvs2svn_lib/dumpfile_delegate.py b/cvs2svn_lib/dumpfile_delegate.py
deleted file mode 100644
index 092cfca..0000000
--- a/cvs2svn_lib/dumpfile_delegate.py
+++ /dev/null
@@ -1,510 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains database facilities used by cvs2svn."""
-
-
-try:
- from hashlib import md5
-except ImportError:
- from md5 import new as md5
-
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.common import path_split
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.cvs_file import CVSDirectory
-from cvs2svn_lib.cvs_file import CVSFile
-from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
-from cvs2svn_lib.apple_single_filter import get_maybe_apple_single_stream
-
-
-# Things that can happen to a file.
-OP_ADD = 'add'
-OP_CHANGE = 'change'
-
-
-class DumpfileDelegate(SVNRepositoryDelegate):
- """Create a Subversion dumpfile."""
-
- def __init__(self, revision_reader, dumpfile_path):
- """Return a new DumpfileDelegate instance, attached to a dumpfile
- DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
-
- self._revision_reader = revision_reader
- self.dumpfile_path = dumpfile_path
-
- self.dumpfile = open(self.dumpfile_path, 'wb')
- self._write_dumpfile_header(self.dumpfile)
-
- # A set of the basic project infrastructure project directories
- # that have been created so far, as SVN paths. (The root
- # directory is considered to be present at initialization.) This
- # includes all of the LOD paths, and all of their parent
- # directories etc.
- self._basic_directories = set([''])
-
- def _write_dumpfile_header(self, dumpfile):
- # Initialize the dumpfile with the standard headers.
- #
- # Since the CVS repository doesn't have a UUID, and the Subversion
- # repository will be created with one anyway, we don't specify a
- # UUID in the dumpflie
- dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
-
- def _utf8_path(self, path):
- """Return a copy of PATH encoded in UTF-8."""
-
- # Convert each path component separately (as they may each use
- # different encodings).
- try:
- return '/'.join([
- Ctx().cvs_filename_decoder(piece).encode('utf8')
- for piece in path.split('/')
- ])
- except UnicodeError:
- raise FatalError(
- "Unable to convert a path '%s' to internal encoding.\n"
- "Consider rerunning with one or more '--encoding' parameters or\n"
- "with '--fallback-encoding'."
- % (path,))
-
- def _string_for_prop(self, name, value):
- """Return a property in the form needed for the dumpfile."""
-
- return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
-
- def start_commit(self, revnum, revprops):
- """Emit the start of SVN_COMMIT (an SVNCommit)."""
-
- self.revision = revnum
-
- # The start of a new commit typically looks like this:
- #
- # Revision-number: 1
- # Prop-content-length: 129
- # Content-length: 129
- #
- # K 7
- # svn:log
- # V 27
- # Log message for revision 1.
- # K 10
- # svn:author
- # V 7
- # jrandom
- # K 8
- # svn:date
- # V 27
- # 2003-04-22T22:57:58.132837Z
- # PROPS-END
- #
- # Notice that the length headers count everything -- not just the
- # length of the data but also the lengths of the lengths, including
- # the 'K ' or 'V ' prefixes.
- #
- # The reason there are both Prop-content-length and Content-length
- # is that the former includes just props, while the latter includes
- # everything. That's the generic header form for any entity in a
- # dumpfile. But since revisions only have props, the two lengths
- # are always the same for revisions.
-
- # Calculate the output needed for the property definitions.
- prop_names = revprops.keys()
- prop_names.sort()
- prop_strings = []
- for propname in prop_names:
- if revprops[propname] is not None:
- prop_strings.append(
- self._string_for_prop(propname, revprops[propname]))
-
- all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
- total_len = len(all_prop_strings)
-
- # Print the revision header and revprops
- self.dumpfile.write(
- 'Revision-number: %d\n'
- 'Prop-content-length: %d\n'
- 'Content-length: %d\n'
- '\n'
- '%s'
- '\n'
- % (self.revision, total_len, total_len, all_prop_strings)
- )
-
- def end_commit(self):
- pass
-
- def _make_any_dir(self, path):
- """Emit the creation of directory PATH."""
-
- self.dumpfile.write(
- "Node-path: %s\n"
- "Node-kind: dir\n"
- "Node-action: add\n"
- "\n"
- "\n"
- % self._utf8_path(path)
- )
-
- def _register_basic_directory(self, path, create):
- """Register the creation of PATH if it is not already there.
-
- Create any parent directories that do not already exist. If
- CREATE is set, also create PATH if it doesn't already exist. This
- method should only be used for the LOD paths and the directories
- containing them, not for directories within an LOD path."""
-
- if path not in self._basic_directories:
- # Make sure that the parent directory is present:
- self._register_basic_directory(path_split(path)[0], True)
- if create:
- self._make_any_dir(path)
- self._basic_directories.add(path)
-
- def initialize_project(self, project):
- """Create any initial directories for the project.
-
- The trunk, tags, and branches directories directories are created
- the first time the project is seen. Be sure not to create parent
- directories that already exist (e.g., because two directories
- share part of their paths either within or across projects)."""
-
- for path in project.get_initial_directories():
- self._register_basic_directory(path, True)
-
- def initialize_lod(self, lod):
- lod_path = lod.get_path()
- if lod_path:
- self._register_basic_directory(lod_path, True)
-
- def mkdir(self, lod, cvs_directory):
- self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
-
- def _add_or_change_path(self, s_item, op):
- """Emit the addition or change corresponding to S_ITEM.
-
- OP is either the constant OP_ADD or OP_CHANGE."""
-
- assert op in [OP_ADD, OP_CHANGE]
-
- # Convenience variables
- cvs_rev = s_item.cvs_rev
-
- # The property handling here takes advantage of an undocumented
- # but IMHO consistent feature of the Subversion dumpfile-loading
- # code. When a node's properties aren't mentioned (that is, the
- # "Prop-content-length:" header is absent, no properties are
- # listed at all, and there is no "PROPS-END\n" line) then no
- # change is made to the node's properties.
- #
- # This is consistent with the way dumpfiles behave w.r.t. text
- # content changes, so I'm comfortable relying on it. If you
- # commit a change to *just* the properties of some node that
- # already has text contents from a previous revision, then in the
- # dumpfile output for the prop change, no "Text-content-length:"
- # nor "Text-content-md5:" header will be present, and the text of
- # the file will not be given. But this does not cause the file's
- # text to be erased! It simply remains unchanged.
- #
- # This works out great for cvs2svn, due to lucky coincidences:
- #
- # For files, the only properties we ever set are set in the first
- # revision; all other revisions (including on branches) inherit
- # from that. After the first revision, we never change file
- # properties, therefore, there is no need to remember the full set
- # of properties on a given file once we've set it.
- #
- # For directories, the only property we set is "svn:ignore", and
- # while we may change it after the first revision, we always do so
- # based on the contents of a ".cvsignore" file -- in other words,
- # CVS is doing the remembering for us, so we still don't have to
- # preserve the previous value of the property ourselves.
-
- # Calculate the (sorted-by-name) property string and length, if any.
- if s_item.svn_props_changed:
- svn_props = s_item.svn_props
- prop_contents = ''
- prop_names = svn_props.keys()
- prop_names.sort()
- for pname in prop_names:
- pvalue = svn_props[pname]
- if pvalue is not None:
- prop_contents += self._string_for_prop(pname, pvalue)
- prop_contents += 'PROPS-END\n'
- props_header = 'Prop-content-length: %d\n' % len(prop_contents)
- else:
- prop_contents = ''
- props_header = ''
-
- # If the file has keywords, we must prevent CVS/RCS from expanding
- # the keywords because they must be unexpanded in the repository,
- # or Subversion will get confused.
- stream = self._revision_reader.get_content_stream(
- cvs_rev, suppress_keyword_substitution=s_item.has_keywords()
- )
-
- if Ctx().decode_apple_single:
- # Insert a filter to decode any files that are in AppleSingle
- # format:
- stream = get_maybe_apple_single_stream(stream)
-
- # Insert a filter to convert all EOLs to LFs if neccessary
-
- eol_style = s_item.svn_props.get('svn:eol-style', None)
- if eol_style:
- stream = LF_EOL_Filter(stream, eol_style)
-
- buf = None
-
- # treat .cvsignore as a directory property
- dir_path, basename = path_split(cvs_rev.get_svn_path())
- if basename == '.cvsignore':
- buf = stream.read()
- ignore_vals = generate_ignores(buf)
- ignore_contents = '\n'.join(ignore_vals)
- if ignore_contents:
- ignore_contents += '\n'
- ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
- (len(ignore_contents), ignore_contents))
- ignore_contents += 'PROPS-END\n'
- ignore_len = len(ignore_contents)
-
- # write headers, then props
- self.dumpfile.write(
- 'Node-path: %s\n'
- 'Node-kind: dir\n'
- 'Node-action: change\n'
- 'Prop-content-length: %d\n'
- 'Content-length: %d\n'
- '\n'
- '%s'
- % (self._utf8_path(dir_path),
- ignore_len, ignore_len, ignore_contents)
- )
- if not Ctx().keep_cvsignore:
- stream.close()
- return
-
- self.dumpfile.write(
- 'Node-path: %s\n'
- 'Node-kind: file\n'
- 'Node-action: %s\n'
- '%s' # no property header if no props
- % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
- )
-
- pos = self.dumpfile.tell()
-
- content_header_fmt = (
- 'Text-content-length: %16d\n'
- 'Text-content-md5: %32s\n'
- 'Content-length: %16d\n'
- '\n'
- )
-
- self.dumpfile.write(content_header_fmt % (0, '', 0,))
-
- if prop_contents:
- self.dumpfile.write(prop_contents)
-
- # Insert the rev contents, calculating length and checksum as we go.
- checksum = md5()
- length = 0
- if buf is None:
- buf = stream.read(config.PIPE_READ_SIZE)
- while buf != '':
- checksum.update(buf)
- length += len(buf)
- self.dumpfile.write(buf)
- buf = stream.read(config.PIPE_READ_SIZE)
-
- stream.close()
-
- # Go back to overwrite the length and checksum headers with the
- # correct values. The content length is the length of property
- # data, text data, and any metadata around/inside around them:
- self.dumpfile.seek(pos, 0)
- self.dumpfile.write(
- content_header_fmt
- % (length, checksum.hexdigest(), length + len(prop_contents),)
- )
-
- # Jump back to the end of the stream
- self.dumpfile.seek(0, 2)
-
- # This record is done (write two newlines -- one to terminate
- # contents that weren't themselves newline-termination, one to
- # provide a blank line for readability.
- self.dumpfile.write('\n\n')
-
- def add_path(self, s_item):
- """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
-
- self._add_or_change_path(s_item, OP_ADD)
-
- def change_path(self, s_item):
- """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
-
- self._add_or_change_path(s_item, OP_CHANGE)
-
- def delete_lod(self, lod):
- """Emit the deletion of LOD."""
-
- self.dumpfile.write(
- 'Node-path: %s\n'
- 'Node-action: delete\n'
- '\n'
- % (self._utf8_path(lod.get_path()),)
- )
- self._basic_directories.remove(lod.get_path())
-
- def delete_path(self, lod, cvs_path):
- dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
- if basename == '.cvsignore':
- # When a .cvsignore file is deleted, the directory's svn:ignore
- # property needs to be deleted.
- ignore_contents = 'PROPS-END\n'
- ignore_len = len(ignore_contents)
-
- # write headers, then props
- self.dumpfile.write(
- 'Node-path: %s\n'
- 'Node-kind: dir\n'
- 'Node-action: change\n'
- 'Prop-content-length: %d\n'
- 'Content-length: %d\n'
- '\n'
- '%s'
- % (self._utf8_path(dir_path),
- ignore_len, ignore_len, ignore_contents)
- )
- if not Ctx().keep_cvsignore:
- return
-
- self.dumpfile.write(
- 'Node-path: %s\n'
- 'Node-action: delete\n'
- '\n'
- % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
- )
-
- def copy_lod(self, src_lod, dest_lod, src_revnum):
- # Register the main LOD directory, and create parent directories
- # as needed:
- self._register_basic_directory(dest_lod.get_path(), False)
-
- self.dumpfile.write(
- 'Node-path: %s\n'
- 'Node-kind: dir\n'
- 'Node-action: add\n'
- 'Node-copyfrom-rev: %d\n'
- 'Node-copyfrom-path: %s\n'
- '\n'
- % (self._utf8_path(dest_lod.get_path()),
- src_revnum, self._utf8_path(src_lod.get_path()))
- )
-
- def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
- if isinstance(cvs_path, CVSFile):
- node_kind = 'file'
- if cvs_path.basename == '.cvsignore':
- # FIXME: Here we have to adjust the containing directory's
- # svn:ignore property to reflect the addition of the
- # .cvsignore file to the LOD! This is awkward because we
- # don't have the contents of the .cvsignore file available.
- if not Ctx().keep_cvsignore:
- return
- elif isinstance(cvs_path, CVSDirectory):
- node_kind = 'dir'
- else:
- raise InternalError()
-
- self.dumpfile.write(
- 'Node-path: %s\n'
- 'Node-kind: %s\n'
- 'Node-action: add\n'
- 'Node-copyfrom-rev: %d\n'
- 'Node-copyfrom-path: %s\n'
- '\n'
- % (
- self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
- node_kind,
- src_revnum,
- self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
- )
- )
-
- def finish(self):
- """Perform any cleanup necessary after all revisions have been
- committed."""
-
- self.dumpfile.close()
-
-
-def generate_ignores(raw_ignore_val):
- ignore_vals = [ ]
- for ignore in raw_ignore_val.split():
- # Reset the list if we encounter a '!'
- # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
- if ignore == '!':
- ignore_vals = [ ]
- else:
- ignore_vals.append(ignore)
- return ignore_vals
-
-
-class LF_EOL_Filter:
- """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
- into the appropriate canonical eol style."""
-
- eol_style_replacements = {
- 'LF' : '\n',
- 'CR' : '\r',
- 'CRLF' : '\r\n',
- 'native' : '\n',
- }
-
- def __init__(self, stream, eol_style):
- self.stream = stream
- self.replacement = self.eol_style_replacements[eol_style]
- self.carry_cr = False
- self.eof = False
-
- def read(self, size=-1):
- while True:
- buf = self.stream.read(size)
- self.eof = len(buf) == 0
- if self.carry_cr:
- buf = '\r' + buf
- self.carry_cr = False
- if not self.eof and buf[-1] == '\r':
- self.carry_cr = True
- buf = buf[:-1]
- buf = buf.replace('\r\n', '\n')
- buf = buf.replace('\r', '\n')
- if self.replacement != '\n':
- buf = buf.replace('\n', self.replacement)
- if buf or self.eof:
- return buf
-
- def close(self):
- self.stream.close()
- self.stream = None
-
-
diff --git a/cvs2svn_lib/fill_source.py b/cvs2svn_lib/fill_source.py
deleted file mode 100644
index 2bb8e4c..0000000
--- a/cvs2svn_lib/fill_source.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes describing the sources of symbol fills."""
-
-
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import SVN_INVALID_REVNUM
-from cvs2svn_lib.svn_revision_range import SVNRevisionRange
-from cvs2svn_lib.svn_revision_range import RevisionScores
-
-
-class FillSource:
- """Representation of a fill source.
-
- A FillSource keeps track of the paths that have to be filled in a
- particular symbol fill.
-
- This class holds a SVNRevisionRange instance for each CVSFile that
- has to be filled within the subtree of the repository rooted at
- self.cvs_path. The SVNRevisionRange objects are stored in a tree
- in which the directory nodes are dictionaries mapping CVSPaths to
- subnodes and the leaf nodes are the SVNRevisionRange objects telling
- for what source_lod and what range of revisions the leaf could serve
- as a source.
-
- FillSource objects are able to compute the score for arbitrary
- source LODs and source revision numbers.
-
- These objects are used by the symbol filler in SVNOutputOption."""
-
- def __init__(self, cvs_path, symbol, node_tree):
- """Create a fill source.
-
- The best LOD and SVN REVNUM to use as the copy source can be
- determined by calling compute_best_source().
-
- Members:
-
- cvs_path -- (CVSPath): the CVSPath described by this FillSource.
-
- _symbol -- (Symbol) the symbol to be filled.
-
- _node_tree -- (dict) a tree stored as a map { CVSPath : node },
- where subnodes have the same form. Leaves are
- SVNRevisionRange instances telling the source_lod and range
- of SVN revision numbers from which the CVSPath can be
- copied.
-
- """
-
- self.cvs_path = cvs_path
- self._symbol = symbol
- self._node_tree = node_tree
-
- def _set_node(self, cvs_file, svn_revision_range):
- parent_node = self._get_node(cvs_file.parent_directory, create=True)
- if cvs_file in parent_node:
- raise InternalError(
- '%s appeared twice in sources for %s' % (cvs_file, self._symbol)
- )
- parent_node[cvs_file] = svn_revision_range
-
- def _get_node(self, cvs_path, create=False):
- if cvs_path == self.cvs_path:
- return self._node_tree
- else:
- parent_node = self._get_node(cvs_path.parent_directory, create=create)
- try:
- return parent_node[cvs_path]
- except KeyError:
- if create:
- node = {}
- parent_node[cvs_path] = node
- return node
- else:
- raise
-
- def compute_best_source(self, preferred_source):
- """Determine the best source_lod and subversion revision number to copy.
-
- Return the best source found, as an SVNRevisionRange instance. If
- PREFERRED_SOURCE is not None and its opening is among the sources
- with the best scores, return it; otherwise, return the oldest such
- revision on the first such source_lod (ordered by the natural LOD
- sort order). The return value's source_lod is the best LOD to
- copy from, and its opening_revnum is the best SVN revision."""
-
- # Aggregate openings and closings from our rev tree
- svn_revision_ranges = self._get_revision_ranges(self._node_tree)
-
- # Score the lists
- revision_scores = RevisionScores(svn_revision_ranges)
-
- best_source_lod, best_revnum, best_score = \
- revision_scores.get_best_revnum()
-
- if (
- preferred_source is not None
- and revision_scores.get_score(preferred_source) == best_score
- ):
- best_source_lod = preferred_source.source_lod
- best_revnum = preferred_source.opening_revnum
-
- if best_revnum == SVN_INVALID_REVNUM:
- raise FatalError(
- "failed to find a revision to copy from when copying %s"
- % self._symbol.name
- )
-
- return SVNRevisionRange(best_source_lod, best_revnum)
-
- def _get_revision_ranges(self, node):
- """Return a list of all the SVNRevisionRanges at and under NODE.
-
- Include duplicates. This is a helper method used by
- compute_best_source()."""
-
- if isinstance(node, SVNRevisionRange):
- # It is a leaf node.
- return [ node ]
- else:
- # It is an intermediate node.
- revision_ranges = []
- for key, subnode in node.items():
- revision_ranges.extend(self._get_revision_ranges(subnode))
- return revision_ranges
-
- def get_subsources(self):
- """Generate (CVSPath, FillSource) for all direct subsources."""
-
- if not isinstance(self._node_tree, SVNRevisionRange):
- for cvs_path, node in self._node_tree.items():
- fill_source = FillSource(cvs_path, self._symbol, node)
- yield (cvs_path, fill_source)
-
- def get_subsource_map(self):
- """Return the map {CVSPath : FillSource} of direct subsources."""
-
- src_entries = {}
-
- for (cvs_path, fill_subsource) in self.get_subsources():
- src_entries[cvs_path] = fill_subsource
-
- return src_entries
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%s(%s:%s)' % (
- self.__class__.__name__, self._symbol, self.cvs_path,
- )
-
- def __repr__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%s%r' % (self, self._node_tree,)
-
-
-def get_source_set(symbol, range_map):
- """Return a FillSource describing the fill sources for RANGE_MAP.
-
- SYMBOL is either a Branch or a Tag. RANGE_MAP is a map { CVSSymbol
- : SVNRevisionRange } as returned by
- SymbolingsReader.get_range_map().
-
- Use the SVNRevisionRanges from RANGE_MAP to create a FillSource
- instance describing the sources for filling SYMBOL."""
-
- root_cvs_directory = symbol.project.get_root_cvs_directory()
- fill_source = FillSource(root_cvs_directory, symbol, {})
-
- for cvs_symbol, svn_revision_range in range_map.items():
- fill_source._set_node(cvs_symbol.cvs_file, svn_revision_range)
-
- return fill_source
-
-
diff --git a/cvs2svn_lib/fulltext_revision_recorder.py b/cvs2svn_lib/fulltext_revision_recorder.py
deleted file mode 100644
index ad057b7..0000000
--- a/cvs2svn_lib/fulltext_revision_recorder.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""An abstract class that contructs file contents during CollectRevsPass.
-
-It calls its record_fulltext() method with the full text of every
-revision. This method should be overridden to do something with the
-fulltext and possibly return a revision_recorder_token."""
-
-
-from cvs2svn_lib.revision_manager import RevisionRecorder
-
-
-class FulltextRevisionRecorder:
- """Similar to a RevisionRecorder, but it requires the fulltext."""
-
- def register_artifacts(self, which_pass):
- pass
-
- def start(self):
- pass
-
- def start_file(self, cvs_file_items):
- pass
-
- def record_fulltext(self, cvs_rev, log, fulltext):
- """Record the fulltext for CVS_REV.
-
- CVS_REV has the log message LOG and the fulltext FULLTEXT. This
- method should be overridden to do something sensible with them."""
-
- raise NotImplementedError()
-
- def finish_file(self, cvs_file_items):
- pass
-
- def finish(self):
- pass
-
-
-class FulltextRevisionRecorderAdapter(RevisionRecorder):
- """Reconstruct the fulltext and pass it to a FulltextRevisionRecorder.
-
- This class implements RevisionRecorder (so it can be passed directly
- to CollectRevsPass). But it doesn't actually record anything.
- Instead, it reconstructs the fulltext of each revision, and passes
- the fulltext to a fulltext_revision_recorder."""
-
- def __init__(self, fulltext_revision_recorder):
- RevisionRecorder.__init__(self)
- self.fulltext_revision_recorder = fulltext_revision_recorder
-
- def register_artifacts(self, which_pass):
- self.fulltext_revision_recorder.register_artifacts(which_pass)
-
- def start(self):
- self.fulltext_revision_recorder.start()
-
- def start_file(self, cvs_file_items):
- self.fulltext_revision_recorder.start_file(cvs_file_items)
-
- def record_text(self, cvs_rev, log, text):
- """This method should be overwridden.
-
- It should determine the fulltext of CVS_REV, then pass it to
- self.fulltext_revision_recorder.record_fulltext() and return the
- result."""
-
- raise NotImplementedError()
-
- def finish_file(self, cvs_file_items):
- self.fulltext_revision_recorder.finish_file(cvs_file_items)
-
- def finish(self):
- self.fulltext_revision_recorder.finish()
-
-
-class SimpleFulltextRevisionRecorderAdapter(FulltextRevisionRecorderAdapter):
- """Reconstruct the fulltext using a RevisionReader.
-
- To create the fulltext, this class simply uses a RevisionReader (for
- example, RCSRevisionReader or CVSRevisionReader). This is not quite
- as wasteful as using one of these RevisionReaders in OutputPass,
- because the same RCS file will be read over and over (and so
- presumably stay in the disk cache). But it is still pretty silly,
- considering that we have all the RCS deltas available to us."""
-
- def __init__(self, revision_reader, fulltext_revision_recorder):
- FulltextRevisionRecorderAdapter.__init__(self, fulltext_revision_recorder)
- self.revision_reader = revision_reader
-
- def register_artifacts(self, which_pass):
- FulltextRevisionRecorderAdapter.register_artifacts(self, which_pass)
- self.revision_reader.register_artifacts(which_pass)
-
- def start(self):
- FulltextRevisionRecorderAdapter.start(self)
- self.revision_reader.start()
-
- def record_text(self, cvs_rev, log, text):
- # FIXME: We have to decide what to do about keyword substitution
- # and eol_style here:
- fulltext = self.revision_reader.get_content_stream(
- cvs_rev, suppress_keyword_substitution=False
- ).read()
- return self.fulltext_revision_recorder.record_fulltext(
- cvs_rev, log, fulltext
- )
-
- def finish(self):
- FulltextRevisionRecorderAdapter.finish(self)
- self.revision_reader.finish()
-
-
diff --git a/cvs2svn_lib/git_output_option.py b/cvs2svn_lib/git_output_option.py
deleted file mode 100644
index a1e46b9..0000000
--- a/cvs2svn_lib/git_output_option.py
+++ /dev/null
@@ -1,658 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Classes for outputting the converted repository to git.
-
-For information about the format allowed by git-fast-import, see:
-
- http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
-
-"""
-
-import bisect
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.openings_closings import SymbolingsReader
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.symbol import Branch
-from cvs2svn_lib.symbol import Tag
-from cvs2svn_lib.cvs_item import CVSRevisionAdd
-from cvs2svn_lib.cvs_item import CVSRevisionChange
-from cvs2svn_lib.cvs_item import CVSRevisionDelete
-from cvs2svn_lib.cvs_item import CVSRevisionNoop
-from cvs2svn_lib.cvs_item import CVSSymbol
-from cvs2svn_lib.output_option import OutputOption
-from cvs2svn_lib.svn_revision_range import RevisionScores
-from cvs2svn_lib.repository_mirror import RepositoryMirror
-from cvs2svn_lib.key_generator import KeyGenerator
-
-
-# The branch name to use for the "tag fixup branches". The
-# git-fast-import documentation suggests using 'TAG_FIXUP' (outside of
-# the refs/heads namespace), but this is currently broken. Use a name
-# containing '.', which is not allowed in CVS symbols, to avoid
-# conflicts (though of course a conflict could still result if the
-# user requests symbol transformations).
-FIXUP_BRANCH_NAME = 'refs/heads/TAG.FIXUP'
-
-
-class ExpectedDirectoryError(Exception):
- """A file was found where a directory was expected."""
-
- pass
-
-
-class ExpectedFileError(Exception):
- """A directory was found where a file was expected."""
-
- pass
-
-
-class GitRevisionWriter(object):
- def register_artifacts(self, which_pass):
- pass
-
- def start(self, f, mirror):
- self.f = f
- self._mirror = mirror
-
- def _modify_file(self, cvs_item, post_commit):
- raise NotImplementedError()
-
- def _mkdir_p(self, cvs_directory, lod):
- """Make sure that CVS_DIRECTORY exists in LOD.
-
- If not, create it. Return the node for CVS_DIRECTORY."""
-
- try:
- node = self._mirror.get_current_lod_directory(lod)
- except KeyError:
- node = self._mirror.add_lod(lod)
-
- for sub_path in cvs_directory.get_ancestry()[1:]:
- try:
- node = node[sub_path]
- except KeyError:
- node = node.mkdir(sub_path)
- if node is None:
- raise ExpectedDirectoryError(
- 'File found at \'%s\' where directory was expected.' % (sub_path,)
- )
-
- return node
-
- def add_file(self, cvs_rev, post_commit):
- cvs_file = cvs_rev.cvs_file
- if post_commit:
- lod = cvs_file.project.get_trunk()
- else:
- lod = cvs_rev.lod
- parent_node = self._mkdir_p(cvs_file.parent_directory, lod)
- parent_node.add_file(cvs_file)
- self._modify_file(cvs_rev, post_commit)
-
- def modify_file(self, cvs_rev, post_commit):
- cvs_file = cvs_rev.cvs_file
- if post_commit:
- lod = cvs_file.project.get_trunk()
- else:
- lod = cvs_rev.lod
- if self._mirror.get_current_path(cvs_file, lod) is not None:
- raise ExpectedFileError(
- 'Directory found at \'%s\' where file was expected.' % (cvs_file,)
- )
- self._modify_file(cvs_rev, post_commit)
-
- def delete_file(self, cvs_rev, post_commit):
- cvs_file = cvs_rev.cvs_file
- if post_commit:
- lod = cvs_file.project.get_trunk()
- else:
- lod = cvs_rev.lod
- parent_node = self._mirror.get_current_path(
- cvs_file.parent_directory, lod
- )
- if parent_node[cvs_file] is not None:
- raise ExpectedFileError(
- 'Directory found at \'%s\' where file was expected.' % (cvs_file,)
- )
- del parent_node[cvs_file]
- self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
-
- def process_revision(self, cvs_rev, post_commit):
- if isinstance(cvs_rev, CVSRevisionAdd):
- self.add_file(cvs_rev, post_commit)
- elif isinstance(cvs_rev, CVSRevisionChange):
- self.modify_file(cvs_rev, post_commit)
- elif isinstance(cvs_rev, CVSRevisionDelete):
- self.delete_file(cvs_rev, post_commit)
- elif isinstance(cvs_rev, CVSRevisionNoop):
- pass
- else:
- raise InternalError('Unexpected CVSRevision type: %s' % (cvs_rev,))
-
- def branch_file(self, cvs_symbol):
- cvs_file = cvs_symbol.cvs_file
- parent_node = self._mkdir_p(cvs_file.parent_directory, cvs_symbol.symbol)
- parent_node.add_file(cvs_file)
- self._modify_file(cvs_symbol, post_commit=False)
-
- def finish(self):
- del self._mirror
- del self.f
-
-
-class GitRevisionMarkWriter(GitRevisionWriter):
- def _modify_file(self, cvs_item, post_commit):
- if cvs_item.cvs_file.executable:
- mode = '100755'
- else:
- mode = '100644'
-
- self.f.write(
- 'M %s :%d %s\n'
- % (mode, cvs_item.revision_recorder_token,
- cvs_item.cvs_file.cvs_path,)
- )
-
-
-class GitRevisionInlineWriter(GitRevisionWriter):
- def __init__(self, revision_reader):
- self.revision_reader = revision_reader
-
- def register_artifacts(self, which_pass):
- GitRevisionWriter.register_artifacts(self, which_pass)
- self.revision_reader.register_artifacts(which_pass)
-
- def start(self, f, mirror):
- GitRevisionWriter.start(self, f, mirror)
- self.revision_reader.start()
-
- def _modify_file(self, cvs_item, post_commit):
- if cvs_item.cvs_file.executable:
- mode = '100755'
- else:
- mode = '100644'
-
- self.f.write(
- 'M %s inline %s\n'
- % (mode, cvs_item.cvs_file.cvs_path,)
- )
-
- if isinstance(cvs_item, CVSSymbol):
- cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
- else:
- cvs_rev = cvs_item
-
- # FIXME: We have to decide what to do about keyword substitution
- # and eol_style here:
- fulltext = self.revision_reader.get_content_stream(
- cvs_rev, suppress_keyword_substitution=False
- ).read()
-
- self.f.write('data %d\n' % (len(fulltext),))
- self.f.write(fulltext)
- self.f.write('\n')
-
- def finish(self):
- GitRevisionWriter.finish(self)
- self.revision_reader.finish()
-
-
-def get_chunks(iterable, chunk_size):
- """Generate lists containing chunks of the output of ITERABLE.
-
- Each list contains at most CHUNK_SIZE items. If CHUNK_SIZE is None,
- yield the whole contents of ITERABLE in one list."""
-
- if chunk_size is None:
- yield list(iterable)
- else:
- it = iter(iterable)
- while True:
- # If this call to it.next() raises StopIteration, then we have
- # no more chunks to emit, so simply pass the exception through:
- chunk = [it.next()]
-
- # Now try filling the rest of the chunk:
- try:
- while len(chunk) < chunk_size:
- chunk.append(it.next())
- except StopIteration:
- # The iterator was exhausted while filling chunk, but chunk
- # contains at least one element. Yield it, then we're done.
- yield chunk
- break
-
- # Yield the full chunk then continue with the next chunk:
- yield chunk
- del chunk
-
-
-class GitOutputOption(OutputOption):
- """An OutputOption that outputs to a git-fast-import formatted file.
-
- Members:
-
- dump_filename -- (string) the name of the file to which the
- git-fast-import commands for defining revisions will be
- written.
-
- author_transforms -- a map {cvsauthor : (fullname, email)} from
- CVS author names to git full name and email address. All of
- the contents are 8-bit strings encoded as UTF-8.
-
- """
-
- # The first mark number used for git-fast-import commit marks. This
- # value needs to be large to avoid conflicts with blob marks.
- _first_commit_mark = 1000000000
-
- def __init__(
- self, dump_filename, revision_writer,
- max_merges=None, author_transforms=None,
- ):
- """Constructor.
-
- DUMP_FILENAME is the name of the file to which the git-fast-import
- commands for defining revisions should be written. (Please note
- that depending on the style of revision writer, the actual file
- contents might not be written to this file.)
-
- REVISION_WRITER is a GitRevisionWriter that is used to output
- either the content of revisions or a mark that was previously used
- to label a blob.
-
- MAX_MERGES can be set to an integer telling the maximum number of
- parents that can be merged into a commit at once (aside from the
- natural parent). If it is set to None, then there is no limit.
-
- AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
- CVS author names to git full name and email address. All of the
- contents should either be Unicode strings or 8-bit strings encoded
- as UTF-8.
-
- """
-
- self.dump_filename = dump_filename
- self.revision_writer = revision_writer
- self.max_merges = max_merges
-
- def to_utf8(s):
- if isinstance(s, unicode):
- return s.encode('utf8')
- else:
- return s
-
- self.author_transforms = {}
- if author_transforms is not None:
- for (cvsauthor, (name, email,)) in author_transforms.iteritems():
- cvsauthor = to_utf8(cvsauthor)
- name = to_utf8(name)
- email = to_utf8(email)
- self.author_transforms[cvsauthor] = (name, email,)
-
- self._mirror = RepositoryMirror()
-
- self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
-
- def register_artifacts(self, which_pass):
- # These artifacts are needed for SymbolingsReader:
- artifact_manager.register_temp_file_needed(
- config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.SYMBOL_OFFSETS_DB, which_pass
- )
- self.revision_writer.register_artifacts(which_pass)
- self._mirror.register_artifacts(which_pass)
-
- def check(self):
- if Ctx().cross_project_commits:
- raise FatalError(
- 'Git output is not supported with cross-project commits'
- )
- if Ctx().cross_branch_commits:
- raise FatalError(
- 'Git output is not supported with cross-branch commits'
- )
- if Ctx().username is None:
- raise FatalError(
- 'Git output requires a default commit username'
- )
-
- def check_symbols(self, symbol_map):
- # FIXME: What constraints does git impose on symbols?
- pass
-
- def setup(self, svn_rev_count):
- self._symbolings_reader = SymbolingsReader()
- self.f = open(self.dump_filename, 'wb')
-
- # The youngest revnum that has been committed so far:
- self._youngest = 0
-
- # A map {lod : [(revnum, mark)]} giving each of the revision
- # numbers in which there was a commit to lod, and the mark active
- # at the end of the revnum.
- self._marks = {}
-
- self._mirror.open()
- self.revision_writer.start(self.f, self._mirror)
-
- def _create_commit_mark(self, lod, revnum):
- mark = self._mark_generator.gen_id()
- self._set_lod_mark(lod, revnum, mark)
- return mark
-
- def _set_lod_mark(self, lod, revnum, mark):
- """Record MARK as the status of LOD for REVNUM.
-
- If there is already an entry for REVNUM, overwrite it. If not,
- append a new entry to the self._marks list for LOD."""
-
- assert revnum >= self._youngest
- entry = (revnum, mark)
- try:
- modifications = self._marks[lod]
- except KeyError:
- # This LOD hasn't appeared before; create a new list and add the
- # entry:
- self._marks[lod] = [entry]
- else:
- # A record exists, so it necessarily has at least one element:
- if modifications[-1][0] == revnum:
- modifications[-1] = entry
- else:
- modifications.append(entry)
- self._youngest = revnum
-
- def _get_author(self, svn_commit):
- """Return the author to be used for SVN_COMMIT.
-
- Return the author in the form needed by git; that is, 'foo <bar>'."""
-
- author = svn_commit.get_author()
- (name, email,) = self.author_transforms.get(author, (author, author,))
- return '%s <%s>' % (name, email,)
-
- @staticmethod
- def _get_log_msg(svn_commit):
- return svn_commit.get_log_msg()
-
- def process_initial_project_commit(self, svn_commit):
- self._mirror.start_commit(svn_commit.revnum)
- self._mirror.end_commit()
-
- def process_primary_commit(self, svn_commit):
- author = self._get_author(svn_commit)
- log_msg = self._get_log_msg(svn_commit)
-
- lods = set()
- for cvs_rev in svn_commit.get_cvs_items():
- lods.add(cvs_rev.lod)
- if len(lods) != 1:
- raise InternalError('Commit affects %d LODs' % (len(lods),))
- lod = lods.pop()
-
- self._mirror.start_commit(svn_commit.revnum)
- if isinstance(lod, Trunk):
- # FIXME: is this correct?:
- self.f.write('commit refs/heads/master\n')
- else:
- self.f.write('commit refs/heads/%s\n' % (lod.name,))
- self.f.write(
- 'mark :%d\n'
- % (self._create_commit_mark(lod, svn_commit.revnum),)
- )
- self.f.write(
- 'committer %s %d +0000\n' % (author, svn_commit.date,)
- )
- self.f.write('data %d\n' % (len(log_msg),))
- self.f.write('%s\n' % (log_msg,))
- for cvs_rev in svn_commit.get_cvs_items():
- self.revision_writer.process_revision(cvs_rev, post_commit=False)
-
- self.f.write('\n')
- self._mirror.end_commit()
-
- def process_post_commit(self, svn_commit):
- author = self._get_author(svn_commit)
- log_msg = self._get_log_msg(svn_commit)
-
- source_lods = set()
- for cvs_rev in svn_commit.cvs_revs:
- source_lods.add(cvs_rev.lod)
- if len(source_lods) != 1:
- raise InternalError('Commit is from %d LODs' % (len(source_lods),))
- source_lod = source_lods.pop()
-
- self._mirror.start_commit(svn_commit.revnum)
- # FIXME: is this correct?:
- self.f.write('commit refs/heads/master\n')
- self.f.write(
- 'mark :%d\n'
- % (self._create_commit_mark(None, svn_commit.revnum),)
- )
- self.f.write(
- 'committer %s %d +0000\n' % (author, svn_commit.date,)
- )
- self.f.write('data %d\n' % (len(log_msg),))
- self.f.write('%s\n' % (log_msg,))
- self.f.write(
- 'merge :%d\n'
- % (self._get_source_mark(source_lod, svn_commit.revnum),)
- )
- for cvs_rev in svn_commit.cvs_revs:
- self.revision_writer.process_revision(cvs_rev, post_commit=True)
-
- self.f.write('\n')
- self._mirror.end_commit()
-
- def _get_source_groups(self, svn_commit):
- """Return groups of sources for SVN_COMMIT.
-
- SVN_COMMIT is an instance of SVNSymbolCommit. Yield tuples
- (source_lod, svn_revnum, cvs_symbols) where source_lod is the line
- of development and svn_revnum is the revision that should serve as
- a source, and cvs_symbols is a list of CVSSymbolItems that can be
- copied from that source. The groups are returned in arbitrary
- order."""
-
- # Get a map {CVSSymbol : SVNRevisionRange}:
- range_map = self._symbolings_reader.get_range_map(svn_commit)
-
- # range_map, split up into one map per LOD; i.e., {LOD :
- # {CVSSymbol : SVNRevisionRange}}:
- lod_range_maps = {}
-
- for (cvs_symbol, range) in range_map.iteritems():
- lod_range_map = lod_range_maps.get(range.source_lod)
- if lod_range_map is None:
- lod_range_map = {}
- lod_range_maps[range.source_lod] = lod_range_map
- lod_range_map[cvs_symbol] = range
-
- # Sort the sources so that the branch that serves most often as
- # parent is processed first:
- lod_ranges = lod_range_maps.items()
- lod_ranges.sort(
- lambda (lod1,lod_range_map1),(lod2,lod_range_map2):
- -cmp(len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2)
- )
-
- for (lod, lod_range_map) in lod_ranges:
- while lod_range_map:
- revision_scores = RevisionScores(lod_range_map.values())
- (source_lod, revnum, score) = revision_scores.get_best_revnum()
- assert source_lod == lod
- cvs_symbols = []
- for (cvs_symbol, range) in lod_range_map.items():
- if revnum in range:
- cvs_symbols.append(cvs_symbol)
- del lod_range_map[cvs_symbol]
- yield (lod, revnum, cvs_symbols)
-
- def _get_all_files(self, node):
- """Generate all of the CVSFiles under NODE."""
-
- for cvs_path in node:
- subnode = node[cvs_path]
- if subnode is None:
- yield cvs_path
- else:
- for sub_cvs_path in self._get_all_files(subnode):
- yield sub_cvs_path
-
- def _is_simple_copy(self, svn_commit, source_groups):
- """Return True iff SVN_COMMIT can be created as a simple copy.
-
- SVN_COMMIT is an SVNTagCommit. Return True iff it can be created
- as a simple copy from an existing revision (i.e., if the fixup
- branch can be avoided for this tag creation)."""
-
- # The first requirement is that there be exactly one source:
- if len(source_groups) != 1:
- return False
-
- (source_lod, svn_revnum, cvs_symbols) = source_groups[0]
-
- # The second requirement is that the destination LOD not already
- # exist:
- try:
- self._mirror.get_current_lod_directory(svn_commit.symbol)
- except KeyError:
- # The LOD doesn't already exist. This is good.
- pass
- else:
- # The LOD already exists. It cannot be created by a copy.
- return False
-
- # The third requirement is that the source LOD contains exactly
- # the same files as we need to add to the symbol:
- try:
- source_node = self._mirror.get_old_lod_directory(source_lod, svn_revnum)
- except KeyError:
- raise InternalError('Source %r does not exist' % (source_lod,))
- return (
- set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols])
- == set(self._get_all_files(source_node))
- )
-
- def _get_source_mark(self, source_lod, revnum):
- """Return the mark active on SOURCE_LOD at the end of REVNUM."""
-
- modifications = self._marks[source_lod]
- i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
- (revnum, mark) = modifications[i]
- return mark
-
- def _process_symbol_commit(
- self, svn_commit, git_branch, source_groups, mark
- ):
- author = self._get_author(svn_commit)
- log_msg = self._get_log_msg(svn_commit)
-
- self.f.write('commit %s\n' % (git_branch,))
- self.f.write('mark :%d\n' % (mark,))
- self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
- self.f.write('data %d\n' % (len(log_msg),))
- self.f.write('%s\n' % (log_msg,))
-
- for (source_lod, source_revnum, cvs_symbols,) in source_groups:
- self.f.write(
- 'merge :%d\n'
- % (self._get_source_mark(source_lod, source_revnum),)
- )
-
- for (source_lod, source_revnum, cvs_symbols,) in source_groups:
- for cvs_symbol in cvs_symbols:
- self.revision_writer.branch_file(cvs_symbol)
-
- self.f.write('\n')
-
- def process_branch_commit(self, svn_commit):
- self._mirror.start_commit(svn_commit.revnum)
- source_groups = list(self._get_source_groups(svn_commit))
- for groups in get_chunks(source_groups, self.max_merges):
- self._process_symbol_commit(
- svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
- groups,
- self._create_commit_mark(svn_commit.symbol, svn_commit.revnum),
- )
- self._mirror.end_commit()
-
- def _set_symbol(self, symbol, mark):
- if isinstance(symbol, Branch):
- category = 'heads'
- elif isinstance(symbol, Tag):
- category = 'tags'
- else:
- raise InternalError()
- self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
- self.f.write('from :%d\n' % (mark,))
-
- def process_tag_commit(self, svn_commit):
- # FIXME: For now we create a fixup branch with the same name as
- # the tag, then the tag. We never delete the fixup branch. Also,
- # a fixup branch is created even if the tag could be created from
- # a single source.
- self._mirror.start_commit(svn_commit.revnum)
-
- source_groups = list(self._get_source_groups(svn_commit))
- if self._is_simple_copy(svn_commit, source_groups):
- (source_lod, source_revnum, cvs_symbols) = source_groups[0]
- Log().debug(
- '%s will be created via a simple copy from %s:r%d'
- % (svn_commit.symbol, source_lod, source_revnum,)
- )
- mark = self._get_source_mark(source_lod, source_revnum)
- self._set_symbol(svn_commit.symbol, mark)
- else:
- Log().debug(
- '%s will be created via a fixup branch' % (svn_commit.symbol,)
- )
-
- # Create the fixup branch (which might involve making more than
- # one commit):
- for groups in get_chunks(source_groups, self.max_merges):
- mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
- self._process_symbol_commit(
- svn_commit, FIXUP_BRANCH_NAME, groups, mark
- )
-
- # Store the mark of the last commit to the fixup branch as the
- # value of the tag:
- self._set_symbol(svn_commit.symbol, mark)
- self.f.write('reset %s\n' % (FIXUP_BRANCH_NAME,))
- self.f.write('\n')
-
- self._mirror.end_commit()
-
- def cleanup(self):
- self.revision_writer.finish()
- self._mirror.close()
- self.f.close()
- del self.f
- self._symbolings_reader.close()
- del self._symbolings_reader
-
-
diff --git a/cvs2svn_lib/git_revision_recorder.py b/cvs2svn_lib/git_revision_recorder.py
deleted file mode 100644
index 604f8ac..0000000
--- a/cvs2svn_lib/git_revision_recorder.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Write file contents to a stream of git-fast-import blobs."""
-
-import itertools
-
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.cvs_item import CVSRevisionDelete
-from cvs2svn_lib.cvs_item import CVSSymbol
-from cvs2svn_lib.fulltext_revision_recorder import FulltextRevisionRecorder
-from cvs2svn_lib.key_generator import KeyGenerator
-
-
-class GitRevisionRecorder(FulltextRevisionRecorder):
- """Output file revisions to git-fast-import."""
-
- def __init__(self, blob_filename):
- self.blob_filename = blob_filename
-
- def start(self):
- self.dump_file = open(self.blob_filename, 'wb')
- self._mark_generator = KeyGenerator()
-
- def start_file(self, cvs_file_items):
- self._cvs_file_items = cvs_file_items
-
- def _get_original_source(self, cvs_rev):
- """Return the original source of the contents of CVS_REV.
-
- Return the first non-delete CVSRevision with the same contents as
- CVS_REV. 'First' here refers to deltatext order; i.e., the very
- first revision is HEAD on trunk, then backwards to the root of a
- branch, then out to the tip of a branch.
-
- The candidates are all revisions along the CVS delta-dependency
- chain until the next one that has a deltatext (inclusive). Of the
- candidates, CVSRevisionDeletes are disqualified because, even
- though CVS records their contents, it is impossible to extract
- their fulltext using commands like 'cvs checkout -p'.
-
- If there is no other CVSRevision that has the same content, return
- CVS_REV itself."""
-
- # Keep track of the "best" source CVSRevision found so far:
- best_source_rev = None
-
- for cvs_rev in itertools.chain(
- [cvs_rev], self._cvs_file_items.iter_deltatext_ancestors(cvs_rev)
- ):
- if not isinstance(cvs_rev, CVSRevisionDelete):
- best_source_rev = cvs_rev
-
- if cvs_rev.deltatext_exists:
- break
-
- return best_source_rev
-
- def record_fulltext(self, cvs_rev, log, fulltext):
- """Write the fulltext to a blob if it is original and not a delete.
-
- The reason we go to this trouble is to avoid writing the same file
- contents multiple times for a string of revisions that don't have
- deltatexts (as, for example, happens with dead revisions and
- imported revisions)."""
-
- if isinstance(cvs_rev, CVSRevisionDelete):
- # There is no need to record a delete revision, and its token
- # will never be needed:
- return None
-
- source = self._get_original_source(cvs_rev)
-
- if source.id == cvs_rev.id:
- # Revision is its own source; write it out:
- mark = self._mark_generator.gen_id()
- self.dump_file.write('blob\n')
- self.dump_file.write('mark :%d\n' % (mark,))
- self.dump_file.write('data %d\n' % (len(fulltext),))
- self.dump_file.write(fulltext)
- self.dump_file.write('\n')
- return mark
- else:
- # Return as revision_recorder_token the CVSRevision.id of the
- # original source revision:
- return source.revision_recorder_token
-
- def finish_file(self, cvs_file_items):
- # Determine the original source of each CVSSymbol, and store it as
- # the symbol's revision_recorder_token.
- for cvs_item in cvs_file_items.values():
- if isinstance(cvs_item, CVSSymbol):
- cvs_source = cvs_item.get_cvs_revision_source(cvs_file_items)
- cvs_item.revision_recorder_token = cvs_source.revision_recorder_token
-
- del self._cvs_file_items
-
- def finish(self):
- self.dump_file.close()
-
-
diff --git a/cvs2svn_lib/git_run_options.py b/cvs2svn_lib/git_run_options.py
deleted file mode 100644
index 726b127..0000000
--- a/cvs2svn_lib/git_run_options.py
+++ /dev/null
@@ -1,274 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module manages cvs2git run options."""
-
-
-import sys
-import datetime
-import codecs
-
-from cvs2svn_lib.version import VERSION
-from cvs2svn_lib.common import error_prefix
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.run_options import not_both
-from cvs2svn_lib.run_options import RunOptions
-from cvs2svn_lib.run_options import ContextOption
-from cvs2svn_lib.run_options import IncompatibleOption
-from cvs2svn_lib.run_options import authors
-from cvs2svn_lib.man_writer import ManWriter
-from cvs2svn_lib.project import Project
-from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
-from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
-from cvs2svn_lib.git_revision_recorder import GitRevisionRecorder
-from cvs2svn_lib.git_output_option import GitRevisionMarkWriter
-from cvs2svn_lib.git_output_option import GitOutputOption
-from cvs2svn_lib.revision_manager import NullRevisionRecorder
-from cvs2svn_lib.revision_manager import NullRevisionExcluder
-from cvs2svn_lib.fulltext_revision_recorder \
- import SimpleFulltextRevisionRecorderAdapter
-
-
-short_desc = 'convert a cvs repository into a git repository'
-
-synopsis = """\
-.B cvs2git
-[\\fIOPTION\\fR]... \\fIOUTPUT-OPTIONS CVS-REPOS-PATH\\fR
-.br
-.B cvs2git
-[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
-"""
-
-long_desc = """\
-Create a new git repository based on the version history stored in a
-CVS repository. Each CVS commit will be mirrored in the git
-repository, including such information as date of commit and id of the
-committer.
-.P
-The output of this program are a "blobfile" and a "dumpfile", which
-together can be loaded into a git repository using "git fast-import".
-.P
-\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
-repository that you want to convert. This path doesn't have to be the
-top level directory of a CVS repository; it can point at a project
-within a repository, in which case only that project will be
-converted. This path or one of its parent directories has to contain
-a subdirectory called CVSROOT (though the CVSROOT directory can be
-empty).
-.P
-It is not possible directly to convert a CVS repository to which you
-only have remote access, but the FAQ describes tools that may be used
-to create a local copy of a remote CVS repository.
-"""
-
-files = """\
-A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
-\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
-"""
-
-see_also = [
- ('cvs', '1'),
- ('git', '1'),
- ('git-fast-import', '1'),
- ]
-
-
-class GitRunOptions(RunOptions):
- def __init__(self, progname, cmd_args, pass_manager):
- Ctx().cross_project_commits = False
- Ctx().cross_branch_commits = False
- RunOptions.__init__(self, progname, cmd_args, pass_manager)
-
- def _get_output_options_group(self):
- group = RunOptions._get_output_options_group(self)
-
- group.add_option(IncompatibleOption(
- '--blobfile', type='string',
- action='store',
- help='path to which the "blob" data should be written',
- man_help=(
- 'Write the "blob" data (containing revision contents) to '
- '\\fIpath\\fR.'
- ),
- metavar='PATH',
- ))
- group.add_option(IncompatibleOption(
- '--dumpfile', type='string',
- action='store',
- help='path to which the revision data should be written',
- man_help=(
- 'Write the revision data (branches and commits) to \\fIpath\\fR.'
- ),
- metavar='PATH',
- ))
- group.add_option(ContextOption(
- '--dry-run',
- action='store_true',
- help=(
- 'do not create any output; just print what would happen.'
- ),
- man_help=(
- 'Do not create any output; just print what would happen.'
- ),
- ))
-
- return group
-
- def _get_extraction_options_group(self):
- group = RunOptions._get_extraction_options_group(self)
-
- self.parser.set_default('use_cvs', False)
- group.add_option(IncompatibleOption(
- '--use-cvs',
- action='store_true',
- help=(
- 'use CVS to extract revision contents (slower than '
- '--use-rcs but more reliable) (default)'
- ),
- man_help=(
- 'Use CVS to extract revision contents. This option is slower '
- 'than \\fB--use-rcs\\fR but more reliable.'
- ),
- ))
- self.parser.set_default('use_rcs', False)
- group.add_option(IncompatibleOption(
- '--use-rcs',
- action='store_true',
- help=(
- 'use RCS to extract revision contents (faster than '
- '--use-cvs but fails in some cases)'
- ),
- man_help=(
- 'Use RCS \'co\' to extract revision contents. This option is '
- 'faster than \\fB--use-cvs\\fR but fails in some cases.'
- ),
- ))
-
- return group
-
- def callback_manpage(self, option, opt_str, value, parser):
- f = codecs.getwriter('utf_8')(sys.stdout)
- ManWriter(
- parser,
- section='1',
- date=datetime.date.today(),
- source='Version %s' % (VERSION,),
- manual='User Commands',
- short_desc=short_desc,
- synopsis=synopsis,
- long_desc=long_desc,
- files=files,
- authors=authors,
- see_also=see_also,
- ).write_manpage(f)
- sys.exit(0)
-
- def process_io_options(self):
- """Process input/output options.
-
- Process options related to extracting data from the CVS repository
- and writing to 'git fast-import'-formatted files."""
-
- ctx = Ctx()
- options = self.options
-
- not_both(options.use_rcs, '--use-rcs',
- options.use_cvs, '--use-cvs')
-
- if options.use_rcs:
- revision_reader = RCSRevisionReader(
- co_executable=options.co_executable
- )
- else:
- # --use-cvs is the default:
- revision_reader = CVSRevisionReader(
- cvs_executable=options.cvs_executable
- )
-
- if ctx.dry_run:
- ctx.revision_recorder = NullRevisionRecorder()
- else:
- if not (options.blobfile and options.dumpfile):
- raise FatalError("must pass '--blobfile' and '--dumpfile' options.")
- ctx.revision_recorder = SimpleFulltextRevisionRecorderAdapter(
- revision_reader,
- GitRevisionRecorder(options.blobfile),
- )
-
- ctx.revision_excluder = NullRevisionExcluder()
- ctx.revision_reader = None
-
- ctx.output_option = GitOutputOption(
- options.dumpfile,
- GitRevisionMarkWriter(),
- max_merges=None,
- # Optional map from CVS author names to git author names:
- author_transforms={}, # FIXME
- )
-
- def set_project(
- self,
- project_cvs_repos_path,
- symbol_transforms=None,
- symbol_strategy_rules=[],
- ):
- """Set the project to be converted.
-
- If a project had already been set, overwrite it.
-
- Most arguments are passed straight through to the Project
- constructor. SYMBOL_STRATEGY_RULES is an iterable of
- SymbolStrategyRules that will be applied to symbols in this
- project."""
-
- symbol_strategy_rules = list(symbol_strategy_rules)
-
- project = Project(
- 0,
- project_cvs_repos_path,
- symbol_transforms=symbol_transforms,
- )
-
- self.projects = [project]
- self.project_symbol_strategy_rules = [symbol_strategy_rules]
-
- def process_options(self):
- # Consistency check for options and arguments.
- if len(self.args) == 0:
- self.usage()
- sys.exit(1)
-
- if len(self.args) > 1:
- Log().error(error_prefix + ": must pass only one CVS repository.\n")
- self.usage()
- sys.exit(1)
-
- cvsroot = self.args[0]
-
- self.process_io_options()
- self.process_symbol_strategy_options()
- self.process_property_setter_options()
-
- # Create the project:
- self.set_project(
- cvsroot,
- symbol_transforms=self.options.symbol_transforms,
- symbol_strategy_rules=self.options.symbol_strategy_rules,
- )
-
-
diff --git a/cvs2svn_lib/key_generator.py b/cvs2svn_lib/key_generator.py
deleted file mode 100644
index d580d6b..0000000
--- a/cvs2svn_lib/key_generator.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the KeyGenerator class."""
-
-
-class KeyGenerator:
- """Generate a series of unique keys."""
-
- def __init__(self, first_id=1):
- """Initialize a KeyGenerator with the specified FIRST_ID.
-
- FIRST_ID should be an int or long, and the generated keys will be
- of the same type."""
-
- self._key_base = first_id
- self._last_id = None
-
- def gen_id(self):
- """Generate and return a previously-unused key, as an integer."""
-
- self._last_id = self._key_base
- self._key_base += 1
-
- return self._last_id
-
- def get_last_id(self):
- """Return the last id that was generated, as an integer."""
-
- return self._last_id
-
-
diff --git a/cvs2svn_lib/log.py b/cvs2svn_lib/log.py
deleted file mode 100644
index 798350c..0000000
--- a/cvs2svn_lib/log.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains a simple logging facility for cvs2svn."""
-
-
-import sys
-import time
-import threading
-
-
-class Log:
- """A Simple logging facility.
-
- If self.log_level is DEBUG or higher, each line will be timestamped
- with the number of wall-clock seconds since the time when this
- module was first imported.
-
- If self.use_timestamps is True, each line will be timestamped with a
- human-readable clock time.
-
- The public methods of this class are thread-safe.
-
- This class is a Borg; see
- http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
-
- # These constants represent the log levels that this class supports.
- # The increase_verbosity() and decrease_verbosity() methods rely on
- # these constants being consecutive integers:
- ERROR = -2
- WARN = -1
- QUIET = 0
- NORMAL = 1
- VERBOSE = 2
- DEBUG = 3
-
- start_time = time.time()
-
- __shared_state = {}
-
- def __init__(self):
- self.__dict__ = self.__shared_state
- if self.__dict__:
- return
-
- self.log_level = Log.NORMAL
-
- # Set this to True if you want to see timestamps on each line output.
- self.use_timestamps = False
-
- # The output file to use for errors:
- self._err = sys.stderr
-
- # The output file to use for lower-priority messages:
- self._out = sys.stdout
-
- # Lock to serialize writes to the log:
- self.lock = threading.Lock()
-
- def increase_verbosity(self):
- self.lock.acquire()
- try:
- self.log_level = min(self.log_level + 1, Log.DEBUG)
- finally:
- self.lock.release()
-
- def decrease_verbosity(self):
- self.lock.acquire()
- try:
- self.log_level = max(self.log_level - 1, Log.ERROR)
- finally:
- self.lock.release()
-
- def is_on(self, level):
- """Return True iff messages at the specified LEVEL are currently on.
-
- LEVEL should be one of the constants Log.WARN, Log.QUIET, etc."""
-
- return self.log_level >= level
-
- def _timestamp(self):
- """Return a timestamp if needed, as a string with a trailing space."""
-
- retval = []
-
- if self.log_level >= Log.DEBUG:
- retval.append('%f: ' % (time.time() - self.start_time,))
-
- if self.use_timestamps:
- retval.append(time.strftime('[%Y-%m-%d %I:%M:%S %Z] - '))
-
- return ''.join(retval)
-
- def _write(self, out, *args):
- """Write a message to OUT.
-
- If there are multiple ARGS, they will be separated by spaces. If
- there are multiple lines, they will be output one by one with the
- same timestamp prefix."""
-
- timestamp = self._timestamp()
- s = ' '.join(map(str, args))
- lines = s.split('\n')
- if lines and not lines[-1]:
- del lines[-1]
-
- self.lock.acquire()
- try:
- for s in lines:
- out.write('%s%s\n' % (timestamp, s,))
- # Ensure that log output doesn't get out-of-order with respect to
- # stderr output.
- out.flush()
- finally:
- self.lock.release()
-
- def write(self, *args):
- """Write a message to SELF._out.
-
- This is a public method to use for writing to the output log
- unconditionally."""
-
- self._write(self._out, *args)
-
- def error(self, *args):
- """Log a message at the ERROR level."""
-
- if self.is_on(Log.ERROR):
- self._write(self._err, *args)
-
- def warn(self, *args):
- """Log a message at the WARN level."""
-
- if self.is_on(Log.WARN):
- self._write(self._out, *args)
-
- def quiet(self, *args):
- """Log a message at the QUIET level."""
-
- if self.is_on(Log.QUIET):
- self._write(self._out, *args)
-
- def normal(self, *args):
- """Log a message at the NORMAL level."""
-
- if self.is_on(Log.NORMAL):
- self._write(self._out, *args)
-
- def verbose(self, *args):
- """Log a message at the VERBOSE level."""
-
- if self.is_on(Log.VERBOSE):
- self._write(self._out, *args)
-
- def debug(self, *args):
- """Log a message at the DEBUG level."""
-
- if self.is_on(Log.DEBUG):
- self._write(self._out, *args)
-
-
diff --git a/cvs2svn_lib/main.py b/cvs2svn_lib/main.py
deleted file mode 100644
index 492c49e..0000000
--- a/cvs2svn_lib/main.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python2
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-import os
-import errno
-import gc
-
-try:
- # Try to get access to a bunch of encodings for use with --encoding.
- # See http://cjkpython.i18n.org/ for details.
- import iconv_codec
-except ImportError:
- pass
-
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.svn_run_options import SVNRunOptions
-from cvs2svn_lib.git_run_options import GitRunOptions
-from cvs2svn_lib.bzr_run_options import BzrRunOptions
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.pass_manager import PassManager
-from cvs2svn_lib.passes import passes
-
-
-def main(progname, run_options, pass_manager):
- # Disable garbage collection, as we try not to create any circular
- # data structures:
- gc.disable()
-
- # Convenience var, so we don't have to keep instantiating this Borg.
- ctx = Ctx()
-
- # Make sure the tmp directory exists. Note that we don't check if
- # it's empty -- we want to be able to use, for example, "." to hold
- # tempfiles. But if we *did* want check if it were empty, we'd do
- # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
- if not os.path.exists(ctx.tmpdir):
- erase_tmpdir = True
- os.mkdir(ctx.tmpdir)
- elif not os.path.isdir(ctx.tmpdir):
- raise FatalError(
- "cvs2svn tried to use '%s' for temporary files, but that path\n"
- " exists and is not a directory. Please make it be a directory,\n"
- " or specify some other directory for temporary files."
- % (ctx.tmpdir,))
- else:
- erase_tmpdir = False
-
- # But do lock the tmpdir, to avoid process clash.
- try:
- os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
- except OSError, e:
- if e.errno == errno.EACCES:
- raise FatalError("Permission denied:"
- + " No write access to directory '%s'." % ctx.tmpdir)
- if e.errno == errno.EEXIST:
- raise FatalError(
- "cvs2svn is using directory '%s' for temporary files, but\n"
- " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
- " cvs2svn process is currently using '%s' as its temporary\n"
- " workspace. If you are certain that is not the case,\n"
- " then remove the '%s/cvs2svn.lock' subdirectory."
- % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
- raise
-
- try:
- if run_options.profiling:
- import hotshot
- prof = hotshot.Profile('cvs2svn.hotshot')
- prof.runcall(pass_manager.run, run_options)
- prof.close()
- else:
- pass_manager.run(run_options)
- finally:
- try:
- os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
- except:
- pass
-
- if erase_tmpdir:
- try:
- os.rmdir(ctx.tmpdir)
- except:
- pass
-
-
-def svn_main(progname, cmd_args):
- pass_manager = PassManager(passes)
- run_options = SVNRunOptions(progname, cmd_args, pass_manager)
- main(progname, run_options, pass_manager)
-
-
-def git_main(progname, cmd_args):
- pass_manager = PassManager(passes)
- run_options = GitRunOptions(progname, cmd_args, pass_manager)
- main(progname, run_options, pass_manager)
-
-
-def bzr_main(progname, cmd_args):
- pass_manager = PassManager(passes)
- run_options = BzrRunOptions(progname, cmd_args, pass_manager)
- main(progname, run_options, pass_manager)
-
-
diff --git a/cvs2svn_lib/man_writer.py b/cvs2svn_lib/man_writer.py
deleted file mode 100644
index 3cca8c9..0000000
--- a/cvs2svn_lib/man_writer.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the ManWriter class for outputting manpages."""
-
-
-import datetime
-import optparse
-import re
-
-
-whitespace_re = re.compile(r'\s+')
-
-def wrap(s, width=70):
- # Convert all whitespace substrings to single spaces:
- s = whitespace_re.sub(' ', s)
- s = s.strip()
- retval = []
- while s:
- if len(s) <= width:
- retval.append(s)
- break
- i = s.rfind(' ', 0, width + 1)
- if i == -1:
- # There were no spaces within the first width+1 characters; break
- # at the next space after width:
- i = s.find(' ', width + 1)
- if i == -1:
- # There were no spaces in s at all.
- retval.append(s)
- break
-
- retval.append(s[:i].rstrip())
- s = s[i+1:].lstrip()
-
- for (i,line) in enumerate(retval):
- if line.startswith('\'') or line.startswith('.'):
- # These are roff control characters and have to be escaped:
- retval[i] = '\\' + line
-
- return '\n'.join(retval)
-
-
-class ManOption(optparse.Option):
- """An optparse.Option that holds an explicit string for the man page."""
-
- def __init__(self, *args, **kw):
- self.man_help = kw.pop('man_help')
- optparse.Option.__init__(self, *args, **kw)
-
-
-class ManWriter(object):
- def __init__(
- self,
- parser,
- section, date, source, manual,
- short_desc, synopsis, long_desc, files, authors, see_also,
- ):
- self.parser = parser
- self.section = section
- self.date = date
- self.source = source
- self.manual = manual
- self.short_desc = short_desc
- self.synopsis = synopsis
- self.long_desc = long_desc
- self.files = files
- self.authors = authors
- self.see_also = see_also
-
- def write_title(self, f):
- f.write('.\\" Process this file with\n')
- f.write(
- '.\\" groff -man -Tascii %s.%s\n' % (
- self.parser.get_prog_name(),
- self.section,
- )
- )
- f.write(
- '.TH %s "%s" "%s" "%s" "%s"\n' % (
- self.parser.get_prog_name().upper(),
- self.section,
- self.date.strftime('%b %d, %Y'),
- self.source,
- self.manual,
- )
- )
-
- def write_name(self, f):
- f.write('.SH "NAME"\n')
- f.write(
- '%s \- %s\n' % (
- self.parser.get_prog_name(),
- self.short_desc,
- )
- )
-
- def write_synopsis(self, f):
- f.write('.SH "SYNOPSIS"\n')
- f.write(self.synopsis)
-
- def write_description(self, f):
- f.write('.SH "DESCRIPTION"\n')
- f.write(self.long_desc)
-
- def _get_option_strings(self, option):
- """Return a list of option strings formatted with their metavariables.
-
- This method is very similar to
- optparse.HelpFormatter.format_option_strings().
-
- """
-
- if option.takes_value():
- metavar = (option.metavar or option.dest).lower()
- short_opts = [
- '\\fB%s\\fR \\fI%s\\fR' % (opt, metavar)
- for opt in option._short_opts
- ]
- long_opts = [
- '\\fB%s\\fR=\\fI%s\\fR' % (opt, metavar)
- for opt in option._long_opts
- ]
- else:
- short_opts = [
- '\\fB%s\\fR' % (opt,)
- for opt in option._short_opts
- ]
- long_opts = [
- '\\fB%s\\fR' % (opt,)
- for opt in option._long_opts
- ]
-
- return short_opts + long_opts
-
- def _write_option(self, f, option):
- man_help = getattr(option, 'man_help', option.help)
-
- if man_help is not optparse.SUPPRESS_HELP:
- man_help = wrap(man_help)
- f.write('.IP "%s"\n' % (', '.join(self._get_option_strings(option)),))
- f.write('%s\n' % (man_help,))
-
- def _write_container_help(self, f, container):
- for option in container.option_list:
- if option.help is not optparse.SUPPRESS_HELP:
- self._write_option(f, option)
-
- def write_options(self, f):
- f.write('.SH "OPTIONS"\n')
- if self.parser.option_list:
- (self._write_container_help(f, self.parser))
- for group in self.parser.option_groups:
- f.write('.SH "%s"\n' % (group.title.upper(),))
- if group.description:
- f.write(self.format_description(group.description) + '\n')
- self._write_container_help(f, group)
-
- def write_files(self, f):
- f.write('.SH "FILES"\n')
- f.write(self.files)
-
- def write_authors(self, f):
- f.write('.SH "AUTHORS"\n')
- f.write(self.authors)
-
- def write_see_also(self, f):
- f.write('.SH "SEE ALSO"\n')
- f.write(', '.join([
- '%s(%s)' % (name, section,)
- for (name, section,) in self.see_also
- ]) + '\n')
-
- def write_manpage(self, f):
- self.write_title(f)
- self.write_name(f)
- self.write_synopsis(f)
- self.write_description(f)
- self.write_options(f)
- self.write_files(f)
- self.write_authors(f)
- self.write_see_also(f)
-
-
diff --git a/cvs2svn_lib/metadata.py b/cvs2svn_lib/metadata.py
deleted file mode 100644
index 6cd1337..0000000
--- a/cvs2svn_lib/metadata.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Represent CVSRevision metadata."""
-
-
-class Metadata(object):
- def __init__(self, id, author, log_msg):
- self.id = id
- self.author = author
- self.log_msg = log_msg
-
-
diff --git a/cvs2svn_lib/metadata_database.py b/cvs2svn_lib/metadata_database.py
deleted file mode 100644
index de01920..0000000
--- a/cvs2svn_lib/metadata_database.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes to manage CVSRevision metadata."""
-
-
-try:
- from hashlib import sha1
-except ImportError:
- from sha import new as sha1
-
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.database import IndexedDatabase
-from cvs2svn_lib.key_generator import KeyGenerator
-from cvs2svn_lib.serializer import PrimedPickleSerializer
-from cvs2svn_lib.metadata import Metadata
-
-
-def MetadataDatabase(store_filename, index_table_filename, mode):
- """A database to store Metadata instances that describe CVSRevisions.
-
- This database manages a map
-
- id -> Metadata instance
-
- where id is a unique identifier for the metadata."""
-
- return IndexedDatabase(
- store_filename, index_table_filename,
- mode, PrimedPickleSerializer((Metadata,)),
- )
-
-
-class MetadataLogger:
- """Store and generate IDs for the metadata associated with CVSRevisions.
-
- We want CVSRevisions that might be able to be combined to have the
- same metadata ID, so we want a one-to-one relationship id <->
- metadata. We could simply construct a map {metadata : id}, but the
- map would grow too large. Therefore, we generate a digest
- containing the significant parts of the metadata, and construct a
- map {digest : id}.
-
- To get the ID for a new set of metadata, we first create the digest.
- If there is already an ID registered for that digest, we simply
- return it. If not, we generate a new ID, store the metadata in the
- metadata database under that ID, record the mapping {digest : id},
- and return the new id.
-
- What metadata is included in the digest? The author, log_msg,
- project_id (if Ctx().cross_project_commits is not set), and
- branch_name (if Ctx().cross_branch_commits is not set)."""
-
- def __init__(self, metadata_db):
- self._metadata_db = metadata_db
-
- # A map { digest : id }:
- self._digest_to_id = {}
-
- # A key_generator to generate keys for metadata that haven't been
- # seen yet:
- self.key_generator = KeyGenerator()
-
- def store(self, project, branch_name, author, log_msg):
- """Store the metadata and return its id.
-
- Locate the record for a commit with the specified (PROJECT,
- BRANCH_NAME, AUTHOR, LOG_MSG) and return its id. (Depending on
- policy, not all of these items are necessarily used when creating
- the unique id.) If there is no such record, create one and return
- its newly-generated id."""
-
- key = [author, log_msg]
- if not Ctx().cross_project_commits:
- key.append('%x' % project.id)
- if not Ctx().cross_branch_commits:
- key.append(branch_name or '')
-
- digest = sha1('\0'.join(key)).digest()
- try:
- # See if it is already known:
- return self._digest_to_id[digest]
- except KeyError:
- id = self.key_generator.gen_id()
- self._digest_to_id[digest] = id
- self._metadata_db[id] = Metadata(id, author, log_msg)
- return id
-
-
diff --git a/cvs2svn_lib/openings_closings.py b/cvs2svn_lib/openings_closings.py
deleted file mode 100644
index b1d4093..0000000
--- a/cvs2svn_lib/openings_closings.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes to keep track of symbol openings/closings."""
-
-
-import cPickle
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.svn_revision_range import SVNRevisionRange
-
-
-# Constants used in SYMBOL_OPENINGS_CLOSINGS
-OPENING = 'O'
-CLOSING = 'C'
-
-
-class SymbolingsLogger:
- """Manage the file that contains lines for symbol openings and closings.
-
- This data will later be used to determine valid SVNRevision ranges
- from which a file can be copied when creating a branch or tag in
- Subversion. Do this by finding 'Openings' and 'Closings' for each
- file copied onto a branch or tag.
-
- An 'Opening' is the beginning of the lifetime of the source
- (CVSRevision or CVSBranch) from which a given CVSSymbol sprouts.
-
- The 'Closing' is the SVN revision when the source is deleted or
- overwritten.
-
- For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
- obviously sprouts from revision 1.2. Therefore, the SVN revision
- when 1.2 is committed is the opening for BEE on path 'foo.c', and
- the SVN revision when 1.3 is committed is the closing for BEE on
- path 'foo.c'. Note that there may be many revisions chronologically
- between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
- perhaps even including on branch BEE itself. But 1.3 is the next
- revision *on the same line* as 1.2, that is why it is the closing
- revision for those symbolic names of which 1.2 is the opening.
-
- The reason for doing all this hullabaloo is (1) to determine what
- range of SVN revision numbers can be used as the source of a copy of
- a particular file onto a branch/tag, and (2) to minimize the number
- of copies and deletes per creation by choosing source SVN revision
- numbers that can be used for as many files as possible.
-
- For example, revisions 1.2 and 1.3 of foo.c might correspond to
- revisions 17 and 30 in Subversion. That means that when creating
- branch BEE, foo.c has to be copied from a Subversion revision number
- in the range 17 <= revnum < 30. Now if there were another file,
- 'bar.c', in the same directory, and 'bar.c's opening and closing for
- BEE correspond to revisions 24 and 39 in Subversion, then we can
- kill two birds with one stone by copying the whole directory from
- somewhere in the range 24 <= revnum < 30."""
-
- def __init__(self):
- self.symbolings = open(
- artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS), 'w')
-
- def log_revision(self, cvs_rev, svn_revnum):
- """Log any openings and closings found in CVS_REV."""
-
- for (symbol_id, cvs_symbol_id,) in cvs_rev.opened_symbols:
- self._log_opening(symbol_id, cvs_symbol_id, svn_revnum)
-
- for (symbol_id, cvs_symbol_id) in cvs_rev.closed_symbols:
- self._log_closing(symbol_id, cvs_symbol_id, svn_revnum)
-
- def log_branch_revision(self, cvs_branch, svn_revnum):
- """Log any openings and closings found in CVS_BRANCH."""
-
- for (symbol_id, cvs_symbol_id,) in cvs_branch.opened_symbols:
- self._log_opening(symbol_id, cvs_symbol_id, svn_revnum)
-
- def _log(self, symbol_id, cvs_symbol_id, svn_revnum, type):
- """Log an opening or closing to self.symbolings.
-
- Write out a single line to the symbol_openings_closings file
- representing that SVN_REVNUM is either the opening or closing
- (TYPE) of CVS_SYMBOL_ID for SYMBOL_ID.
-
- TYPE should be one of the following constants: OPENING or CLOSING."""
-
- self.symbolings.write(
- '%x %d %s %x\n' % (symbol_id, svn_revnum, type, cvs_symbol_id)
- )
-
- def _log_opening(self, symbol_id, cvs_symbol_id, svn_revnum):
- """Log an opening to self.symbolings.
-
- See _log() for more information."""
-
- self._log(symbol_id, cvs_symbol_id, svn_revnum, OPENING)
-
- def _log_closing(self, symbol_id, cvs_symbol_id, svn_revnum):
- """Log a closing to self.symbolings.
-
- See _log() for more information."""
-
- self._log(symbol_id, cvs_symbol_id, svn_revnum, CLOSING)
-
- def close(self):
- self.symbolings.close()
- self.symbolings = None
-
-
-class SymbolingsReader:
- """Provides an interface to retrieve symbol openings and closings.
-
- This class accesses the SYMBOL_OPENINGS_CLOSINGS_SORTED file and the
- SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and returning
- the correct opening and closing Subversion revision numbers for a
- given symbolic name and SVN revision number range."""
-
- def __init__(self):
- """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
- reads the offsets database into memory."""
-
- self.symbolings = open(
- artifact_manager.get_temp_file(
- config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
- 'r')
- # The offsets_db is really small, and we need to read and write
- # from it a fair bit, so suck it into memory
- offsets_db = file(
- artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'rb')
- # A map from symbol_id to offset. The values of this map are
- # incremented as the openings and closings for a symbol are
- # consumed.
- self.offsets = cPickle.load(offsets_db)
- offsets_db.close()
-
- def close(self):
- self.symbolings.close()
- del self.symbolings
- del self.offsets
-
- def _generate_lines(self, symbol):
- """Generate the lines for SYMBOL.
-
- SYMBOL is a TypedSymbol instance. Yield the tuple (revnum, type,
- cvs_symbol_id) for all openings and closings for SYMBOL."""
-
- if symbol.id in self.offsets:
- # Set our read offset for self.symbolings to the offset for this
- # symbol:
- self.symbolings.seek(self.offsets[symbol.id])
-
- while True:
- line = self.symbolings.readline().rstrip()
- if not line:
- break
- (id, revnum, type, cvs_symbol_id) = line.split()
- id = int(id, 16)
- revnum = int(revnum)
- if id != symbol.id:
- break
- cvs_symbol_id = int(cvs_symbol_id, 16)
-
- yield (revnum, type, cvs_symbol_id)
-
- def get_range_map(self, svn_symbol_commit):
- """Return the ranges of all CVSSymbols in SVN_SYMBOL_COMMIT.
-
- Return a map { CVSSymbol : SVNRevisionRange }."""
-
- # A map { cvs_symbol_id : CVSSymbol }:
- cvs_symbol_map = {}
- for cvs_symbol in svn_symbol_commit.get_cvs_items():
- cvs_symbol_map[cvs_symbol.id] = cvs_symbol
-
- range_map = {}
-
- for (revnum, type, cvs_symbol_id) \
- in self._generate_lines(svn_symbol_commit.symbol):
- cvs_symbol = cvs_symbol_map.get(cvs_symbol_id)
- if cvs_symbol is None:
- # This CVSSymbol is not part of SVN_SYMBOL_COMMIT.
- continue
- range = range_map.get(cvs_symbol)
- if type == OPENING:
- if range is not None:
- raise InternalError(
- 'Multiple openings logged for %r' % (cvs_symbol,)
- )
- range_map[cvs_symbol] = SVNRevisionRange(
- cvs_symbol.source_lod, revnum
- )
- else:
- if range is None:
- raise InternalError(
- 'Closing precedes opening for %r' % (cvs_symbol,)
- )
- if range.closing_revnum is not None:
- raise InternalError(
- 'Multiple closings logged for %r' % (cvs_symbol,)
- )
- range.add_closing(revnum)
-
- # Make sure that all CVSSymbols are accounted for, and adjust the
- # closings to be not later than svn_symbol_commit.revnum.
- for cvs_symbol in cvs_symbol_map.itervalues():
- try:
- range = range_map[cvs_symbol]
- except KeyError:
- raise InternalError('No opening for %s' % (cvs_symbol,))
-
- if range.opening_revnum >= svn_symbol_commit.revnum:
- raise InternalError(
- 'Opening in r%d not ready for %s in r%d'
- % (range.opening_revnum, cvs_symbol, svn_symbol_commit.revnum,)
- )
-
- if range.closing_revnum is not None \
- and range.closing_revnum > svn_symbol_commit.revnum:
- range.closing_revnum = None
-
- return range_map
-
-
diff --git a/cvs2svn_lib/output_option.py b/cvs2svn_lib/output_option.py
deleted file mode 100644
index 70419e6..0000000
--- a/cvs2svn_lib/output_option.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes that hold the cvs2svn output options."""
-
-
-class OutputOption:
- """Represents an output choice for a run of cvs2svn."""
-
- def register_artifacts(self, which_pass):
- """Register artifacts that will be needed for this output option.
-
- WHICH_PASS is the pass that will call our callbacks, so it should
- be used to do the registering (e.g., call
- WHICH_PASS.register_temp_file() and/or
- WHICH_PASS.register_temp_file_needed())."""
-
- pass
-
- def check(self):
- """Check that the options stored in SELF are sensible.
-
- This might including the existence of a repository on disk, etc."""
-
- raise NotImplementedError()
-
- def check_symbols(self, symbol_map):
- """Check that the symbols in SYMBOL_MAP are OK for this output option.
-
- SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)},
- indicating how each symbol is planned to be converted. Raise a
- FatalError if the symbol plan is not acceptable for this output
- option."""
-
- raise NotImplementedError()
-
- def setup(self, svn_rev_count):
- """Prepare this output option."""
-
- raise NotImplementedError()
-
- def process_initial_project_commit(self, svn_commit):
- """Process SVN_COMMIT, which is an SVNInitialProjectCommit."""
-
- raise NotImplementedError()
-
- def process_primary_commit(self, svn_commit):
- """Process SVN_COMMIT, which is an SVNPrimaryCommit."""
-
- raise NotImplementedError()
-
- def process_post_commit(self, svn_commit):
- """Process SVN_COMMIT, which is an SVNPostCommit."""
-
- raise NotImplementedError()
-
- def process_branch_commit(self, svn_commit):
- """Process SVN_COMMIT, which is an SVNBranchCommit."""
-
- raise NotImplementedError()
-
- def process_tag_commit(self, svn_commit):
- """Process SVN_COMMIT, which is an SVNTagCommit."""
-
- raise NotImplementedError()
-
- def cleanup(self):
- """Perform any required cleanup related to this output option."""
-
- raise NotImplementedError()
-
-
diff --git a/cvs2svn_lib/pass_manager.py b/cvs2svn_lib/pass_manager.py
deleted file mode 100644
index 90fa2dc..0000000
--- a/cvs2svn_lib/pass_manager.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains tools to manage the passes of a conversion."""
-
-
-import time
-import gc
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.stats_keeper import StatsKeeper
-from cvs2svn_lib.stats_keeper import read_stats_keeper
-from cvs2svn_lib.artifact_manager import artifact_manager
-
-
-class InvalidPassError(FatalError):
- def __init__(self, msg):
- FatalError.__init__(
- self, msg + '\nUse --help-passes for more information.')
-
-
-def check_for_garbage():
- # We've turned off the garbage collector because we shouldn't
- # need it (we don't create circular dependencies) and because it
- # is therefore a waste of time. So here we check for any
- # unreachable objects and generate a debug-level warning if any
- # occur:
- gc.set_debug(gc.DEBUG_SAVEALL)
- gc_count = gc.collect()
- if gc_count:
- if Log().is_on(Log.DEBUG):
- Log().debug(
- 'INTERNAL: %d unreachable object(s) were garbage collected:'
- % (gc_count,)
- )
- for g in gc.garbage:
- Log().debug(' %s' % (g,))
- del gc.garbage[:]
-
-
-class Pass(object):
- """Base class for one step of the conversion."""
-
- def __init__(self):
- # By default, use the pass object's class name as the pass name:
- self.name = self.__class__.__name__
-
- def register_artifacts(self):
- """Register artifacts (created and needed) in artifact_manager."""
-
- raise NotImplementedError
-
- def _register_temp_file(self, basename):
- """Helper method; for brevity only."""
-
- artifact_manager.register_temp_file(basename, self)
-
- def _register_temp_file_needed(self, basename):
- """Helper method; for brevity only."""
-
- artifact_manager.register_temp_file_needed(basename, self)
-
- def run(self, run_options, stats_keeper):
- """Carry out this step of the conversion.
-
- RUN_OPTIONS is an instance of RunOptions. STATS_KEEPER is an
- instance of StatsKeeper."""
-
- raise NotImplementedError
-
-
-class PassManager:
- """Manage a list of passes that can be executed separately or all at once.
-
- Passes are numbered starting with 1."""
-
- def __init__(self, passes):
- """Construct a PassManager with the specified PASSES.
-
- Internally, passes are numbered starting with 1. So PASSES[0] is
- considered to be pass number 1."""
-
- self.passes = passes
- self.num_passes = len(self.passes)
-
- def get_pass_number(self, pass_name, default=None):
- """Return the number of the pass indicated by PASS_NAME.
-
- PASS_NAME should be a string containing the name or number of a
- pass. If a number, it should be in the range 1 <= value <=
- self.num_passes. Return an integer in the same range. If
- PASS_NAME is the empty string and DEFAULT is specified, return
- DEFAULT. Raise InvalidPassError if PASS_NAME cannot be converted
- into a valid pass number."""
-
- if not pass_name and default is not None:
- assert 1 <= default <= self.num_passes
- return default
-
- try:
- # Does pass_name look like an integer?
- pass_number = int(pass_name)
- if not 1 <= pass_number <= self.num_passes:
- raise InvalidPassError(
- 'illegal value (%d) for pass number. Must be 1 through %d or\n'
- 'the name of a known pass.'
- % (pass_number,self.num_passes,))
- return pass_number
- except ValueError:
- # Is pass_name the name of one of the passes?
- for (i, the_pass) in enumerate(self.passes):
- if the_pass.name == pass_name:
- return i + 1
- raise InvalidPassError('Unknown pass name (%r).' % (pass_name,))
-
- def run(self, run_options):
- """Run the specified passes, one after another.
-
- RUN_OPTIONS will be passed to the Passes' run() methods.
- RUN_OPTIONS.start_pass is the number of the first pass that should
- be run. RUN_OPTIONS.end_pass is the number of the last pass that
- should be run. It must be that 1 <= RUN_OPTIONS.start_pass <=
- RUN_OPTIONS.end_pass <= self.num_passes."""
-
- # Convert start_pass and end_pass into the indices of the passes
- # to execute, using the Python index range convention (i.e., first
- # pass executed and first pass *after* the ones that should be
- # executed).
- index_start = run_options.start_pass - 1
- index_end = run_options.end_pass
-
- # Inform the artifact manager when artifacts are created and used:
- for (i, the_pass) in enumerate(self.passes):
- the_pass.register_artifacts()
- # Each pass creates a new version of the statistics file:
- artifact_manager.register_temp_file(
- config.STATISTICS_FILE % (i + 1,), the_pass
- )
- if i != 0:
- # Each pass subsequent to the first reads the statistics file
- # from the preceding pass:
- artifact_manager.register_temp_file_needed(
- config.STATISTICS_FILE % (i + 1 - 1,), the_pass
- )
-
- # Tell the artifact manager about passes that are being skipped this run:
- for the_pass in self.passes[0:index_start]:
- artifact_manager.pass_skipped(the_pass)
-
- start_time = time.time()
- for i in range(index_start, index_end):
- the_pass = self.passes[i]
- Log().quiet('----- pass %d (%s) -----' % (i + 1, the_pass.name,))
- artifact_manager.pass_started(the_pass)
-
- if i == 0:
- stats_keeper = StatsKeeper()
- else:
- stats_keeper = read_stats_keeper(
- artifact_manager.get_temp_file(
- config.STATISTICS_FILE % (i + 1 - 1,)
- )
- )
-
- the_pass.run(run_options, stats_keeper)
- end_time = time.time()
- stats_keeper.log_duration_for_pass(
- end_time - start_time, i + 1, the_pass.name
- )
- Log().normal(stats_keeper.single_pass_timing(i + 1))
- stats_keeper.archive(
- artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1,))
- )
- start_time = end_time
- Ctx().clean()
- # Allow the artifact manager to clean up artifacts that are no
- # longer needed:
- artifact_manager.pass_done(the_pass, Ctx().skip_cleanup)
-
- check_for_garbage()
-
- # Tell the artifact manager about passes that are being deferred:
- for the_pass in self.passes[index_end:]:
- artifact_manager.pass_deferred(the_pass)
-
- Log().quiet(stats_keeper)
- Log().normal(stats_keeper.timings())
-
- # Consistency check:
- artifact_manager.check_clean()
-
- def help_passes(self):
- """Output (to sys.stdout) the indices and names of available passes."""
-
- print 'PASSES:'
- for (i, the_pass) in enumerate(self.passes):
- print '%5d : %s' % (i + 1, the_pass.name,)
-
-
diff --git a/cvs2svn_lib/passes.py b/cvs2svn_lib/passes.py
deleted file mode 100644
index af14692..0000000
--- a/cvs2svn_lib/passes.py
+++ /dev/null
@@ -1,1837 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module defines the passes that make up a conversion."""
-
-
-import sys
-import os
-import shutil
-import cPickle
-
-from cvs2svn_lib import config
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.common import warning_prefix
-from cvs2svn_lib.common import FatalException
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.common import DB_OPEN_READ
-from cvs2svn_lib.common import DB_OPEN_WRITE
-from cvs2svn_lib.common import Timestamper
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.pass_manager import Pass
-from cvs2svn_lib.serializer import PrimedPickleSerializer
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.cvs_file_database import CVSFileDatabase
-from cvs2svn_lib.metadata_database import MetadataDatabase
-from cvs2svn_lib.project import read_projects
-from cvs2svn_lib.project import write_projects
-from cvs2svn_lib.symbol import LineOfDevelopment
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.symbol import Symbol
-from cvs2svn_lib.symbol import Branch
-from cvs2svn_lib.symbol import Tag
-from cvs2svn_lib.symbol import ExcludedSymbol
-from cvs2svn_lib.symbol_database import SymbolDatabase
-from cvs2svn_lib.symbol_database import create_symbol_database
-from cvs2svn_lib.symbol_statistics import SymbolPlanError
-from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
-from cvs2svn_lib.symbol_statistics import SymbolStatistics
-from cvs2svn_lib.cvs_item import CVSRevision
-from cvs2svn_lib.cvs_item import CVSSymbol
-from cvs2svn_lib.cvs_item_database import OldCVSItemStore
-from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
-from cvs2svn_lib.cvs_item_database import cvs_item_primer
-from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
-from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
-from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
-from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
-from cvs2svn_lib.key_generator import KeyGenerator
-from cvs2svn_lib.changeset import RevisionChangeset
-from cvs2svn_lib.changeset import OrderedChangeset
-from cvs2svn_lib.changeset import SymbolChangeset
-from cvs2svn_lib.changeset import BranchChangeset
-from cvs2svn_lib.changeset import create_symbol_changeset
-from cvs2svn_lib.changeset_graph import ChangesetGraph
-from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
-from cvs2svn_lib.changeset_database import ChangesetDatabase
-from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
-from cvs2svn_lib.svn_commit import SVNRevisionCommit
-from cvs2svn_lib.openings_closings import SymbolingsLogger
-from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
-from cvs2svn_lib.persistence_manager import PersistenceManager
-from cvs2svn_lib.collect_data import CollectData
-from cvs2svn_lib.process import call_command
-from cvs2svn_lib.check_dependencies_pass \
- import CheckItemStoreDependenciesPass
-from cvs2svn_lib.check_dependencies_pass \
- import CheckIndexedItemStoreDependenciesPass
-
-
-def sort_file(infilename, outfilename, options=[]):
- """Sort file INFILENAME, storing the results to OUTFILENAME.
-
- OPTIONS is an optional list of strings that are passed as additional
- options to the sort command."""
-
- # GNU sort will sort our dates differently (incorrectly!) if our
- # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
- # it to 'C'
- lc_all_tmp = os.environ.get('LC_ALL', None)
- os.environ['LC_ALL'] = 'C'
-
- # The -T option to sort has a nice side effect. The Win32 sort is
- # case insensitive and cannot be used, and since it does not
- # understand the -T option and dies if we try to use it, there is no
- # risk that we use that sort by accident.
- command = [
- Ctx().sort_executable,
- '-T', Ctx().tmpdir
- ] + options + [
- infilename
- ]
-
- try:
- # Under Windows, the subprocess module uses the Win32
- # CreateProcess, which always looks in the Windows system32
- # directory before it looks in the directories listed in the PATH
- # environment variable. Since the Windows sort.exe is in the
- # system32 directory it will always be chosen. A simple
- # workaround is to launch the sort in a shell. When the shell
- # (cmd.exe) searches it only examines the directories in the PATH
- # so putting the directory with GNU sort ahead of the Windows
- # system32 directory will cause GNU sort to be chosen.
- call_command(
- command, stdout=open(outfilename, 'w'), shell=(sys.platform=='win32')
- )
- finally:
- if lc_all_tmp is None:
- del os.environ['LC_ALL']
- else:
- os.environ['LC_ALL'] = lc_all_tmp
-
- # On some versions of Windows, os.system() does not return an error
- # if the command fails. So add little consistency tests here that
- # the output file was created and has the right size:
-
- if not os.path.exists(outfilename):
- raise FatalError('Sort output file missing: %r' % (outfilename,))
-
- if os.path.getsize(outfilename) != os.path.getsize(infilename):
- raise FatalError(
- 'Sort input and output file sizes differ:\n'
- ' %r (%d bytes)\n'
- ' %r (%d bytes)' % (
- infilename, os.path.getsize(infilename),
- outfilename, os.path.getsize(outfilename),
- )
- )
-
-
-class CollectRevsPass(Pass):
- """This pass was formerly known as pass1."""
-
- def register_artifacts(self):
- self._register_temp_file(config.PROJECTS)
- self._register_temp_file(config.SYMBOL_STATISTICS)
- self._register_temp_file(config.METADATA_INDEX_TABLE)
- self._register_temp_file(config.METADATA_STORE)
- self._register_temp_file(config.CVS_FILES_DB)
- self._register_temp_file(config.CVS_ITEMS_STORE)
- Ctx().revision_recorder.register_artifacts(self)
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Examining all CVS ',v' files...")
- Ctx()._projects = {}
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_NEW)
- cd = CollectData(Ctx().revision_recorder, stats_keeper)
- for project in run_options.projects:
- cd.process_project(project)
- run_options.projects = None
-
- fatal_errors = cd.close()
-
- if fatal_errors:
- raise FatalException("Pass 1 complete.\n"
- + "=" * 75 + "\n"
- + "Error summary:\n"
- + "\n".join(fatal_errors) + "\n"
- + "Exited due to fatal error(s).")
-
- Ctx()._cvs_file_db.close()
- write_projects(artifact_manager.get_temp_file(config.PROJECTS))
- Log().quiet("Done")
-
-
-class CleanMetadataPass(Pass):
- """Clean up CVS revision metadata and write it to a new database."""
-
- def register_artifacts(self):
- self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
- self._register_temp_file(config.METADATA_CLEAN_STORE)
- self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
- self._register_temp_file_needed(config.METADATA_STORE)
-
- def _get_clean_author(self, author):
- """Return AUTHOR, converted appropriately to UTF8.
-
- Raise a UnicodeException if it cannot be converted using the
- configured cvs_author_decoder."""
-
- try:
- return self._authors[author]
- except KeyError:
- pass
-
- try:
- clean_author = Ctx().cvs_author_decoder(author)
- except UnicodeError:
- self._authors[author] = author
- raise UnicodeError('Problem decoding author \'%s\'' % (author,))
-
- try:
- clean_author = clean_author.encode('utf8')
- except UnicodeError:
- self._authors[author] = author
- raise UnicodeError('Problem encoding author \'%s\'' % (author,))
-
- self._authors[author] = clean_author
- return clean_author
-
- def _get_clean_log_msg(self, log_msg):
- """Return LOG_MSG, converted appropriately to UTF8.
-
- Raise a UnicodeException if it cannot be converted using the
- configured cvs_log_decoder."""
-
- try:
- clean_log_msg = Ctx().cvs_log_decoder(log_msg)
- except UnicodeError:
- raise UnicodeError(
- 'Problem decoding log message:\n'
- '%s\n'
- '%s\n'
- '%s'
- % ('-' * 75, log_msg, '-' * 75,)
- )
-
- try:
- return clean_log_msg.encode('utf8')
- except UnicodeError:
- raise UnicodeError(
- 'Problem encoding log message:\n'
- '%s\n'
- '%s\n'
- '%s'
- % ('-' * 75, log_msg, '-' * 75,)
- )
-
- def _clean_metadata(self, metadata):
- """Clean up METADATA by overwriting its members as necessary."""
-
- try:
- metadata.author = self._get_clean_author(metadata.author)
- except UnicodeError, e:
- Log().warn('%s: %s' % (warning_prefix, e,))
- self.warnings = True
-
- try:
- metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
- except UnicodeError, e:
- Log().warn('%s: %s' % (warning_prefix, e,))
- self.warnings = True
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Converting metadata to UTF8...")
- metadata_db = MetadataDatabase(
- artifact_manager.get_temp_file(config.METADATA_STORE),
- artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
- DB_OPEN_READ,
- )
- metadata_clean_db = MetadataDatabase(
- artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
- artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
- DB_OPEN_NEW,
- )
-
- self.warnings = False
-
- # A map {author : clean_author} for those known (to avoid
- # repeating warnings):
- self._authors = {}
-
- for id in metadata_db.iterkeys():
- metadata = metadata_db[id]
-
- # Record the original author name because it might be needed for
- # expanding CVS keywords:
- metadata.original_author = metadata.author
-
- self._clean_metadata(metadata)
-
- metadata_clean_db[id] = metadata
-
- if self.warnings:
- raise FatalError(
- 'There were warnings converting author names and/or log messages\n'
- 'to Unicode (see messages above). Please restart this pass\n'
- 'with one or more \'--encoding\' parameters or with\n'
- '\'--fallback-encoding\'.'
- )
-
- metadata_clean_db.close()
- metadata_db.close()
- Log().quiet("Done")
-
-
-class CollateSymbolsPass(Pass):
- """Divide symbols into branches, tags, and excludes."""
-
- conversion_names = {
- Trunk : 'trunk',
- Branch : 'branch',
- Tag : 'tag',
- ExcludedSymbol : 'exclude',
- Symbol : '.',
- }
-
- def register_artifacts(self):
- self._register_temp_file(config.SYMBOL_DB)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_STATISTICS)
-
- def get_symbol(self, run_options, stats):
- """Use StrategyRules to decide what to do with a symbol.
-
- STATS is an instance of symbol_statistics._Stats describing an
- instance of Symbol or Trunk. To determine how the symbol is to be
- converted, consult the StrategyRules in the project's
- symbol_strategy_rules. Each rule is allowed a chance to change
- the way the symbol will be converted. If the symbol is not a
- Trunk or TypedSymbol after all rules have run, raise
- IndeterminateSymbolException."""
-
- symbol = stats.lod
- rules = run_options.project_symbol_strategy_rules[symbol.project.id]
- for rule in rules:
- symbol = rule.get_symbol(symbol, stats)
- assert symbol is not None
-
- stats.check_valid(symbol)
-
- return symbol
-
- def log_symbol_summary(self, stats, symbol):
- if not self.symbol_info_file:
- return
-
- if isinstance(symbol, Trunk):
- name = '.trunk.'
- preferred_parent_name = '.'
- else:
- name = stats.lod.name
- if symbol.preferred_parent_id is None:
- preferred_parent_name = '.'
- else:
- preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
- if isinstance(preferred_parent, Trunk):
- preferred_parent_name = '.trunk.'
- else:
- preferred_parent_name = preferred_parent.name
-
- if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
- symbol_path = symbol.base_path
- else:
- symbol_path = '.'
-
- self.symbol_info_file.write(
- '%-5d %-30s %-10s %s %s\n' % (
- stats.lod.project.id,
- name,
- self.conversion_names[symbol.__class__],
- symbol_path,
- preferred_parent_name,
- )
- )
- self.symbol_info_file.write(' # %s\n' % (stats,))
- parent_counts = stats.possible_parents.items()
- if parent_counts:
- self.symbol_info_file.write(' # Possible parents:\n')
- parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
- for (pp, count) in parent_counts:
- if isinstance(pp, Trunk):
- self.symbol_info_file.write(
- ' # .trunk. : %d\n' % (count,)
- )
- else:
- self.symbol_info_file.write(
- ' # %s : %d\n' % (pp.name, count,)
- )
-
- def get_symbols(self, run_options):
- """Return a map telling how to convert symbols.
-
- The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
- indicating how each symbol should be converted. Trunk objects in
- SYMBOL_STATS are passed through unchanged. One object is included
- in the return value for each line of development described in
- SYMBOL_STATS.
-
- Raise FatalError if there was an error."""
-
- errors = []
- mismatches = []
-
- if Ctx().symbol_info_filename is not None:
- self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
- self.symbol_info_file.write(
- '# Columns: project_id symbol_name conversion symbol_path '
- 'preferred_parent_name\n'
- )
- else:
- self.symbol_info_file = None
-
- # Initialize each symbol strategy rule a single time, even if it
- # is used in more than one project. First define a map from
- # object id to symbol strategy rule:
- rules = {}
- for rule_list in run_options.project_symbol_strategy_rules:
- for rule in rule_list:
- rules[id(rule)] = rule
-
- for rule in rules.itervalues():
- rule.start(self.symbol_stats)
-
- retval = {}
-
- for stats in self.symbol_stats:
- try:
- symbol = self.get_symbol(run_options, stats)
- except IndeterminateSymbolException, e:
- self.log_symbol_summary(stats, stats.lod)
- mismatches.append(e.stats)
- except SymbolPlanError, e:
- self.log_symbol_summary(stats, stats.lod)
- errors.append(e)
- else:
- self.log_symbol_summary(stats, symbol)
- retval[stats.lod] = symbol
-
- for rule in rules.itervalues():
- rule.finish()
-
- if self.symbol_info_file:
- self.symbol_info_file.close()
-
- del self.symbol_info_file
-
- if errors or mismatches:
- s = ['Problems determining how symbols should be converted:\n']
- for e in errors:
- s.append('%s\n' % (e,))
- if mismatches:
- s.append(
- 'It is not clear how the following symbols '
- 'should be converted.\n'
- 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
- 'and/or\n'
- '--symbol-default to resolve the ambiguity.\n'
- )
- for stats in mismatches:
- s.append(' %s\n' % (stats,))
- raise FatalError(''.join(s))
- else:
- return retval
-
- def run(self, run_options, stats_keeper):
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- self.symbol_stats = SymbolStatistics(
- artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
- )
-
- symbol_map = self.get_symbols(run_options)
-
- # Check the symbols for consistency and bail out if there were errors:
- self.symbol_stats.check_consistency(symbol_map)
-
- # Check that the symbols all have SVN paths set and that the paths
- # are disjoint:
- Ctx().output_option.check_symbols(symbol_map)
-
- for symbol in symbol_map.itervalues():
- if isinstance(symbol, ExcludedSymbol):
- self.symbol_stats.exclude_symbol(symbol)
-
- create_symbol_database(symbol_map.values())
-
- del self.symbol_stats
-
- Log().quiet("Done")
-
-
-class FilterSymbolsPass(Pass):
- """Delete any branches/tags that are to be excluded.
-
- Also delete revisions on excluded branches, and delete other
- references to the excluded symbols."""
-
- def register_artifacts(self):
- self._register_temp_file(config.SUMMARY_SERIALIZER)
- self._register_temp_file(config.CVS_REVS_SUMMARY_DATAFILE)
- self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.CVS_ITEMS_STORE)
- Ctx().revision_excluder.register_artifacts(self)
-
- def run(self, run_options, stats_keeper):
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
- cvs_item_store = OldCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
-
- cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
- f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'wb')
- cPickle.dump(cvs_item_serializer, f, -1)
- f.close()
-
- rev_db = NewSortableCVSRevisionDatabase(
- artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
- cvs_item_serializer,
- )
-
- symbol_db = NewSortableCVSSymbolDatabase(
- artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
- cvs_item_serializer,
- )
-
- revision_excluder = Ctx().revision_excluder
-
- Log().quiet("Filtering out excluded symbols and summarizing items...")
-
- stats_keeper.reset_cvs_rev_info()
- revision_excluder.start()
-
- # Process the cvs items store one file at a time:
- for cvs_file_items in cvs_item_store.iter_cvs_file_items():
- Log().verbose(cvs_file_items.cvs_file.filename)
- cvs_file_items.filter_excluded_symbols(revision_excluder)
- cvs_file_items.mutate_symbols()
- cvs_file_items.adjust_parents()
- cvs_file_items.refine_symbols()
- cvs_file_items.record_opened_symbols()
- cvs_file_items.record_closed_symbols()
- cvs_file_items.check_link_consistency()
-
- # Store whatever is left to the new file and update statistics:
- stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
- for cvs_item in cvs_file_items.values():
- stats_keeper.record_cvs_item(cvs_item)
-
- if isinstance(cvs_item, CVSRevision):
- rev_db.add(cvs_item)
- elif isinstance(cvs_item, CVSSymbol):
- symbol_db.add(cvs_item)
-
- stats_keeper.set_stats_reflect_exclude(True)
-
- rev_db.close()
- symbol_db.close()
- revision_excluder.finish()
- cvs_item_store.close()
- Ctx()._symbol_db.close()
- Ctx()._cvs_file_db.close()
-
- Log().quiet("Done")
-
-
-class SortRevisionSummaryPass(Pass):
- """Sort the revision summary file."""
-
- def register_artifacts(self):
- self._register_temp_file(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
- self._register_temp_file_needed(config.CVS_REVS_SUMMARY_DATAFILE)
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Sorting CVS revision summaries...")
- sort_file(
- artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
- artifact_manager.get_temp_file(
- config.CVS_REVS_SUMMARY_SORTED_DATAFILE))
- Log().quiet("Done")
-
-
-class SortSymbolSummaryPass(Pass):
- """Sort the symbol summary file."""
-
- def register_artifacts(self):
- self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
- self._register_temp_file_needed(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Sorting CVS symbol summaries...")
- sort_file(
- artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
- artifact_manager.get_temp_file(
- config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE))
- Log().quiet("Done")
-
-
-class InitializeChangesetsPass(Pass):
- """Create preliminary CommitSets."""
-
- def register_artifacts(self):
- self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
- self._register_temp_file(config.CHANGESETS_STORE)
- self._register_temp_file(config.CHANGESETS_INDEX)
- self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
- self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.SUMMARY_SERIALIZER)
- self._register_temp_file_needed(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
- self._register_temp_file_needed(
- config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
-
- def get_revision_changesets(self):
- """Generate revision changesets, one at a time.
-
- Each time, yield a list of CVSRevisions that might potentially
- consititute a changeset."""
-
- # Create changesets for CVSRevisions:
- old_metadata_id = None
- old_timestamp = None
- changeset_items = []
-
- db = OldSortableCVSRevisionDatabase(
- artifact_manager.get_temp_file(
- config.CVS_REVS_SUMMARY_SORTED_DATAFILE
- ),
- self.cvs_item_serializer,
- )
-
- for cvs_rev in db:
- if cvs_rev.metadata_id != old_metadata_id \
- or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
- # Start a new changeset. First finish up the old changeset,
- # if any:
- if changeset_items:
- yield changeset_items
- changeset_items = []
- old_metadata_id = cvs_rev.metadata_id
- changeset_items.append(cvs_rev)
- old_timestamp = cvs_rev.timestamp
-
- # Finish up the last changeset, if any:
- if changeset_items:
- yield changeset_items
-
- def get_symbol_changesets(self):
- """Generate symbol changesets, one at a time.
-
- Each time, yield a list of CVSSymbols that might potentially
- consititute a changeset."""
-
- old_symbol_id = None
- changeset_items = []
-
- db = OldSortableCVSSymbolDatabase(
- artifact_manager.get_temp_file(
- config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
- ),
- self.cvs_item_serializer,
- )
-
- for cvs_symbol in db:
- if cvs_symbol.symbol.id != old_symbol_id:
- # Start a new changeset. First finish up the old changeset,
- # if any:
- if changeset_items:
- yield changeset_items
- changeset_items = []
- old_symbol_id = cvs_symbol.symbol.id
- changeset_items.append(cvs_symbol)
-
- # Finish up the last changeset, if any:
- if changeset_items:
- yield changeset_items
-
- @staticmethod
- def compare_items(a, b):
- return (
- cmp(a.timestamp, b.timestamp)
- or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
- or cmp([int(x) for x in a.rev.split('.')],
- [int(x) for x in b.rev.split('.')])
- or cmp(a.id, b.id))
-
- def break_internal_dependencies(self, changeset_items):
- """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
-
- CHANGESET_ITEMS is a list of CVSRevisions that could possibly
- belong in a single RevisionChangeset, but there might be internal
- dependencies among the items. Return a list of lists, where each
- sublist is a list of CVSRevisions and at least one internal
- dependency has been eliminated. Iff CHANGESET_ITEMS does not have
- to be split, then the return value will contain a single value,
- namely the original value of CHANGESET_ITEMS. Split
- CHANGESET_ITEMS at most once, even though the resulting changesets
- might themselves have internal dependencies."""
-
- # We only look for succ dependencies, since by doing so we
- # automatically cover pred dependencies as well. First create a
- # list of tuples (pred, succ) of id pairs for CVSItems that depend
- # on each other.
- dependencies = []
- changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
- for cvs_item in changeset_items:
- for next_id in cvs_item.get_succ_ids():
- if next_id in changeset_cvs_item_ids:
- # Sanity check: a CVSItem should never depend on itself:
- if next_id == cvs_item.id:
- raise InternalError('Item depends on itself: %s' % (cvs_item,))
-
- dependencies.append((cvs_item.id, next_id,))
-
- if dependencies:
- # Sort the changeset_items in a defined order (chronological to the
- # extent that the timestamps are correct and unique).
- changeset_items.sort(self.compare_items)
- indexes = {}
- for (i, changeset_item) in enumerate(changeset_items):
- indexes[changeset_item.id] = i
- # How many internal dependencies would be broken by breaking the
- # Changeset after a particular index?
- breaks = [0] * len(changeset_items)
- for (pred, succ,) in dependencies:
- pred_index = indexes[pred]
- succ_index = indexes[succ]
- breaks[min(pred_index, succ_index)] += 1
- breaks[max(pred_index, succ_index)] -= 1
- best_i = None
- best_count = -1
- best_time = 0
- for i in range(1, len(breaks)):
- breaks[i] += breaks[i - 1]
- for i in range(0, len(breaks) - 1):
- if breaks[i] > best_count:
- best_i = i
- best_count = breaks[i]
- best_time = (changeset_items[i + 1].timestamp
- - changeset_items[i].timestamp)
- elif breaks[i] == best_count \
- and (changeset_items[i + 1].timestamp
- - changeset_items[i].timestamp) < best_time:
- best_i = i
- best_count = breaks[i]
- best_time = (changeset_items[i + 1].timestamp
- - changeset_items[i].timestamp)
- # Reuse the old changeset.id for the first of the split changesets.
- return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
- else:
- return [changeset_items]
-
- def break_all_internal_dependencies(self, changeset_items):
- """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
-
- CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
- be part of a single changeset. Break this list into sublists,
- where the CVSRevisions in each sublist are free of mutual
- dependencies."""
-
- # This method is written non-recursively to avoid any possible
- # problems with recursion depth.
-
- changesets_to_split = [changeset_items]
- while changesets_to_split:
- changesets = self.break_internal_dependencies(changesets_to_split.pop())
- if len(changesets) == 1:
- [changeset_items] = changesets
- yield changeset_items
- else:
- # The changeset had to be split; see if either of the
- # fragments have to be split:
- changesets.reverse()
- changesets_to_split.extend(changesets)
-
- def get_changesets(self):
- """Generate (Changeset, [CVSItem,...]) for all changesets.
-
- The Changesets already have their internal dependencies broken.
- The [CVSItem,...] list is the list of CVSItems in the
- corresponding Changeset."""
-
- for changeset_items in self.get_revision_changesets():
- for split_changeset_items \
- in self.break_all_internal_dependencies(changeset_items):
- yield (
- RevisionChangeset(
- self.changeset_key_generator.gen_id(),
- [cvs_rev.id for cvs_rev in split_changeset_items]
- ),
- split_changeset_items,
- )
-
- for changeset_items in self.get_symbol_changesets():
- yield (
- create_symbol_changeset(
- self.changeset_key_generator.gen_id(),
- changeset_items[0].symbol,
- [cvs_symbol.id for cvs_symbol in changeset_items]
- ),
- changeset_items,
- )
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Creating preliminary commit sets...")
-
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
-
- f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'rb')
- self.cvs_item_serializer = cPickle.load(f)
- f.close()
-
- changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
- DB_OPEN_NEW,
- )
- cvs_item_to_changeset_id = CVSItemToChangesetTable(
- artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
- DB_OPEN_NEW,
- )
-
- self.sorted_cvs_items_db = IndexedCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
- DB_OPEN_NEW)
-
- self.changeset_key_generator = KeyGenerator()
-
- for (changeset, changeset_items) in self.get_changesets():
- if Log().is_on(Log.DEBUG):
- Log().debug(repr(changeset))
- changeset_db.store(changeset)
- for cvs_item in changeset_items:
- self.sorted_cvs_items_db.add(cvs_item)
- cvs_item_to_changeset_id[cvs_item.id] = changeset.id
-
- self.sorted_cvs_items_db.close()
- cvs_item_to_changeset_id.close()
- changeset_db.close()
- Ctx()._symbol_db.close()
- Ctx()._cvs_file_db.close()
-
- del self.cvs_item_serializer
-
- Log().quiet("Done")
-
-
-class ProcessedChangesetLogger:
- def __init__(self):
- self.processed_changeset_ids = []
-
- def log(self, changeset_id):
- if Log().is_on(Log.DEBUG):
- self.processed_changeset_ids.append(changeset_id)
-
- def flush(self):
- if self.processed_changeset_ids:
- Log().debug(
- 'Consumed changeset ids %s'
- % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
-
- del self.processed_changeset_ids[:]
-
-
-class BreakRevisionChangesetCyclesPass(Pass):
- """Break up any dependency cycles involving only RevisionChangesets."""
-
- def register_artifacts(self):
- self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
- self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
- self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
- self._register_temp_file_needed(config.CHANGESETS_STORE)
- self._register_temp_file_needed(config.CHANGESETS_INDEX)
- self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
-
- def get_source_changesets(self):
- old_changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
- DB_OPEN_READ)
-
- changeset_ids = old_changeset_db.keys()
-
- for changeset_id in changeset_ids:
- yield old_changeset_db[changeset_id]
-
- old_changeset_db.close()
- del old_changeset_db
-
- def break_cycle(self, cycle):
- """Break up one or more changesets in CYCLE to help break the cycle.
-
- CYCLE is a list of Changesets where
-
- cycle[i] depends on cycle[i - 1]
-
- Break up one or more changesets in CYCLE to make progress towards
- breaking the cycle. Update self.changeset_graph accordingly.
-
- It is not guaranteed that the cycle will be broken by one call to
- this routine, but at least some progress must be made."""
-
- self.processed_changeset_logger.flush()
- best_i = None
- best_link = None
- for i in range(len(cycle)):
- # It's OK if this index wraps to -1:
- link = ChangesetGraphLink(
- cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
-
- if best_i is None or link < best_link:
- best_i = i
- best_link = link
-
- if Log().is_on(Log.DEBUG):
- Log().debug(
- 'Breaking cycle %s by breaking node %x' % (
- ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
- best_link.changeset.id,))
-
- new_changesets = best_link.break_changeset(self.changeset_key_generator)
-
- self.changeset_graph.delete_changeset(best_link.changeset)
-
- for changeset in new_changesets:
- self.changeset_graph.add_new_changeset(changeset)
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Breaking revision changeset dependency cycles...")
-
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
- Ctx()._cvs_items_db = IndexedCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
- DB_OPEN_READ)
-
- shutil.copyfile(
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET),
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
- cvs_item_to_changeset_id = CVSItemToChangesetTable(
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
- DB_OPEN_WRITE)
-
- changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
- DB_OPEN_NEW)
-
- self.changeset_graph = ChangesetGraph(
- changeset_db, cvs_item_to_changeset_id
- )
-
- max_changeset_id = 0
- for changeset in self.get_source_changesets():
- changeset_db.store(changeset)
- if isinstance(changeset, RevisionChangeset):
- self.changeset_graph.add_changeset(changeset)
- max_changeset_id = max(max_changeset_id, changeset.id)
-
- self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
-
- self.processed_changeset_logger = ProcessedChangesetLogger()
-
- # Consume the graph, breaking cycles using self.break_cycle():
- for (changeset, time_range) in self.changeset_graph.consume_graph(
- cycle_breaker=self.break_cycle
- ):
- self.processed_changeset_logger.log(changeset.id)
-
- self.processed_changeset_logger.flush()
- del self.processed_changeset_logger
-
- self.changeset_graph.close()
- self.changeset_graph = None
- Ctx()._cvs_items_db.close()
- Ctx()._symbol_db.close()
- Ctx()._cvs_file_db.close()
-
- Log().quiet("Done")
-
-
-class RevisionTopologicalSortPass(Pass):
- """Sort RevisionChangesets into commit order.
-
- Also convert them to OrderedChangesets, without changing their ids."""
-
- def register_artifacts(self):
- self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
- self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
- self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
- self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
- self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
-
- def get_source_changesets(self, changeset_db):
- changeset_ids = changeset_db.keys()
-
- for changeset_id in changeset_ids:
- yield changeset_db[changeset_id]
-
- def get_changesets(self):
- changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
- DB_OPEN_READ,
- )
-
- changeset_graph = ChangesetGraph(
- changeset_db,
- CVSItemToChangesetTable(
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_REVBROKEN
- ),
- DB_OPEN_READ,
- )
- )
-
- for changeset in self.get_source_changesets(changeset_db):
- if isinstance(changeset, RevisionChangeset):
- changeset_graph.add_changeset(changeset)
- else:
- yield changeset
-
- changeset_ids = []
-
- # Sentry:
- changeset_ids.append(None)
-
- for (changeset, time_range) in changeset_graph.consume_graph():
- changeset_ids.append(changeset.id)
-
- # Sentry:
- changeset_ids.append(None)
-
- for i in range(1, len(changeset_ids) - 1):
- changeset = changeset_db[changeset_ids[i]]
- yield OrderedChangeset(
- changeset.id, changeset.cvs_item_ids, i - 1,
- changeset_ids[i - 1], changeset_ids[i + 1])
-
- changeset_graph.close()
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Generating CVSRevisions in commit order...")
-
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
- Ctx()._cvs_items_db = IndexedCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
- DB_OPEN_READ)
-
- changesets_revordered_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
- DB_OPEN_NEW)
-
- for changeset in self.get_changesets():
- changesets_revordered_db.store(changeset)
-
- changesets_revordered_db.close()
- Ctx()._cvs_items_db.close()
- Ctx()._symbol_db.close()
- Ctx()._cvs_file_db.close()
-
- Log().quiet("Done")
-
-
-class BreakSymbolChangesetCyclesPass(Pass):
- """Break up any dependency cycles involving only SymbolChangesets."""
-
- def register_artifacts(self):
- self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
- self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
- self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
- self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
- self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
- self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
-
- def get_source_changesets(self):
- old_changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
- DB_OPEN_READ)
-
- changeset_ids = old_changeset_db.keys()
-
- for changeset_id in changeset_ids:
- yield old_changeset_db[changeset_id]
-
- old_changeset_db.close()
-
- def break_cycle(self, cycle):
- """Break up one or more changesets in CYCLE to help break the cycle.
-
- CYCLE is a list of Changesets where
-
- cycle[i] depends on cycle[i - 1]
-
- Break up one or more changesets in CYCLE to make progress towards
- breaking the cycle. Update self.changeset_graph accordingly.
-
- It is not guaranteed that the cycle will be broken by one call to
- this routine, but at least some progress must be made."""
-
- self.processed_changeset_logger.flush()
- best_i = None
- best_link = None
- for i in range(len(cycle)):
- # It's OK if this index wraps to -1:
- link = ChangesetGraphLink(
- cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
-
- if best_i is None or link < best_link:
- best_i = i
- best_link = link
-
- if Log().is_on(Log.DEBUG):
- Log().debug(
- 'Breaking cycle %s by breaking node %x' % (
- ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
- best_link.changeset.id,))
-
- new_changesets = best_link.break_changeset(self.changeset_key_generator)
-
- self.changeset_graph.delete_changeset(best_link.changeset)
-
- for changeset in new_changesets:
- self.changeset_graph.add_new_changeset(changeset)
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Breaking symbol changeset dependency cycles...")
-
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
- Ctx()._cvs_items_db = IndexedCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
- DB_OPEN_READ)
-
- shutil.copyfile(
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
- cvs_item_to_changeset_id = CVSItemToChangesetTable(
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
- DB_OPEN_WRITE)
-
- changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
- DB_OPEN_NEW)
-
- self.changeset_graph = ChangesetGraph(
- changeset_db, cvs_item_to_changeset_id
- )
-
- max_changeset_id = 0
- for changeset in self.get_source_changesets():
- changeset_db.store(changeset)
- if isinstance(changeset, SymbolChangeset):
- self.changeset_graph.add_changeset(changeset)
- max_changeset_id = max(max_changeset_id, changeset.id)
-
- self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
-
- self.processed_changeset_logger = ProcessedChangesetLogger()
-
- # Consume the graph, breaking cycles using self.break_cycle():
- for (changeset, time_range) in self.changeset_graph.consume_graph(
- cycle_breaker=self.break_cycle
- ):
- self.processed_changeset_logger.log(changeset.id)
-
- self.processed_changeset_logger.flush()
- del self.processed_changeset_logger
-
- self.changeset_graph.close()
- self.changeset_graph = None
- Ctx()._cvs_items_db.close()
- Ctx()._symbol_db.close()
- Ctx()._cvs_file_db.close()
-
- Log().quiet("Done")
-
-
-class BreakAllChangesetCyclesPass(Pass):
- """Break up any dependency cycles that are closed by SymbolChangesets."""
-
- def register_artifacts(self):
- self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
- self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
- self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
- self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
- self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
- self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
-
- def get_source_changesets(self):
- old_changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
- DB_OPEN_READ)
-
- changeset_ids = old_changeset_db.keys()
-
- for changeset_id in changeset_ids:
- yield old_changeset_db[changeset_id]
-
- old_changeset_db.close()
-
- def _split_retrograde_changeset(self, changeset):
- """CHANGESET is retrograde. Split it into non-retrograde changesets."""
-
- Log().debug('Breaking retrograde changeset %x' % (changeset.id,))
-
- self.changeset_graph.delete_changeset(changeset)
-
- # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
- ordinal_limits = {}
- for cvs_branch in changeset.iter_cvs_items():
- max_pred_ordinal = 0
- min_succ_ordinal = sys.maxint
-
- for pred_id in cvs_branch.get_pred_ids():
- pred_ordinal = self.ordinals.get(
- self.cvs_item_to_changeset_id[pred_id], 0)
- max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
-
- for succ_id in cvs_branch.get_succ_ids():
- succ_ordinal = self.ordinals.get(
- self.cvs_item_to_changeset_id[succ_id], sys.maxint)
- min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
-
- assert max_pred_ordinal < min_succ_ordinal
- ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
-
- # Find the earliest successor ordinal:
- min_min_succ_ordinal = sys.maxint
- for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
- min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
-
- early_item_ids = []
- late_item_ids = []
- for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
- if max_pred_ordinal >= min_min_succ_ordinal:
- late_item_ids.append(id)
- else:
- early_item_ids.append(id)
-
- assert early_item_ids
- assert late_item_ids
-
- early_changeset = changeset.create_split_changeset(
- self.changeset_key_generator.gen_id(), early_item_ids)
- late_changeset = changeset.create_split_changeset(
- self.changeset_key_generator.gen_id(), late_item_ids)
-
- self.changeset_graph.add_new_changeset(early_changeset)
- self.changeset_graph.add_new_changeset(late_changeset)
-
- early_split = self._split_if_retrograde(early_changeset.id)
-
- # Because of the way we constructed it, the early changeset should
- # not have to be split:
- assert not early_split
-
- self._split_if_retrograde(late_changeset.id)
-
- def _split_if_retrograde(self, changeset_id):
- node = self.changeset_graph[changeset_id]
- pred_ordinals = [
- self.ordinals[id]
- for id in node.pred_ids
- if id in self.ordinals
- ]
- pred_ordinals.sort()
- succ_ordinals = [
- self.ordinals[id]
- for id in node.succ_ids
- if id in self.ordinals
- ]
- succ_ordinals.sort()
- if pred_ordinals and succ_ordinals \
- and pred_ordinals[-1] >= succ_ordinals[0]:
- self._split_retrograde_changeset(self.changeset_db[node.id])
- return True
- else:
- return False
-
- def break_segment(self, segment):
- """Break a changeset in SEGMENT[1:-1].
-
- The range SEGMENT[1:-1] is not empty, and all of the changesets in
- that range are SymbolChangesets."""
-
- best_i = None
- best_link = None
- for i in range(1, len(segment) - 1):
- link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
-
- if best_i is None or link < best_link:
- best_i = i
- best_link = link
-
- if Log().is_on(Log.DEBUG):
- Log().debug(
- 'Breaking segment %s by breaking node %x' % (
- ' -> '.join(['%x' % node.id for node in segment]),
- best_link.changeset.id,))
-
- new_changesets = best_link.break_changeset(self.changeset_key_generator)
-
- self.changeset_graph.delete_changeset(best_link.changeset)
-
- for changeset in new_changesets:
- self.changeset_graph.add_new_changeset(changeset)
-
- def break_cycle(self, cycle):
- """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
-
- CYCLE is a list of SymbolChangesets where
-
- cycle[i] depends on cycle[i - 1]
-
- . Break up one or more changesets in CYCLE to make progress
- towards breaking the cycle. Update self.changeset_graph
- accordingly.
-
- It is not guaranteed that the cycle will be broken by one call to
- this routine, but at least some progress must be made."""
-
- if Log().is_on(Log.DEBUG):
- Log().debug(
- 'Breaking cycle %s' % (
- ' -> '.join(['%x' % changeset.id
- for changeset in cycle + [cycle[0]]]),))
-
- # Unwrap the cycle into a segment then break the segment:
- self.break_segment([cycle[-1]] + cycle + [cycle[0]])
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Breaking CVSSymbol dependency loops...")
-
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
- Ctx()._cvs_items_db = IndexedCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
- DB_OPEN_READ)
-
- shutil.copyfile(
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
- self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
- DB_OPEN_WRITE)
-
- self.changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
- DB_OPEN_NEW)
-
- self.changeset_graph = ChangesetGraph(
- self.changeset_db, self.cvs_item_to_changeset_id
- )
-
- # A map {changeset_id : ordinal} for OrderedChangesets:
- self.ordinals = {}
- # A map {ordinal : changeset_id}:
- ordered_changeset_map = {}
- # A list of all BranchChangeset ids:
- branch_changeset_ids = []
- max_changeset_id = 0
- for changeset in self.get_source_changesets():
- self.changeset_db.store(changeset)
- self.changeset_graph.add_changeset(changeset)
- if isinstance(changeset, OrderedChangeset):
- ordered_changeset_map[changeset.ordinal] = changeset.id
- self.ordinals[changeset.id] = changeset.ordinal
- elif isinstance(changeset, BranchChangeset):
- branch_changeset_ids.append(changeset.id)
- max_changeset_id = max(max_changeset_id, changeset.id)
-
- # An array of ordered_changeset ids, indexed by ordinal:
- ordered_changesets = []
- for ordinal in range(len(ordered_changeset_map)):
- id = ordered_changeset_map[ordinal]
- ordered_changesets.append(id)
-
- ordered_changeset_ids = set(ordered_changeset_map.values())
- del ordered_changeset_map
-
- self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
-
- # First we scan through all BranchChangesets looking for
- # changesets that are individually "retrograde" and splitting
- # those up:
- for changeset_id in branch_changeset_ids:
- self._split_if_retrograde(changeset_id)
-
- del self.ordinals
-
- next_ordered_changeset = 0
-
- self.processed_changeset_logger = ProcessedChangesetLogger()
-
- while self.changeset_graph:
- # Consume any nodes that don't have predecessors:
- for (changeset, time_range) \
- in self.changeset_graph.consume_nopred_nodes():
- self.processed_changeset_logger.log(changeset.id)
- if changeset.id in ordered_changeset_ids:
- next_ordered_changeset += 1
- ordered_changeset_ids.remove(changeset.id)
-
- self.processed_changeset_logger.flush()
-
- if not self.changeset_graph:
- break
-
- # Now work on the next ordered changeset that has not yet been
- # processed. BreakSymbolChangesetCyclesPass has broken any
- # cycles involving only SymbolChangesets, so the presence of a
- # cycle implies that there is at least one ordered changeset
- # left in the graph:
- assert next_ordered_changeset < len(ordered_changesets)
-
- id = ordered_changesets[next_ordered_changeset]
- path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
- if path:
- if Log().is_on(Log.DEBUG):
- Log().debug('Breaking path from %s to %s' % (path[0], path[-1],))
- self.break_segment(path)
- else:
- # There were no ordered changesets among the reachable
- # predecessors, so do generic cycle-breaking:
- if Log().is_on(Log.DEBUG):
- Log().debug(
- 'Breaking generic cycle found from %s'
- % (self.changeset_db[id],)
- )
- self.break_cycle(self.changeset_graph.find_cycle(id))
-
- del self.processed_changeset_logger
- self.changeset_graph.close()
- self.changeset_graph = None
- self.cvs_item_to_changeset_id = None
- self.changeset_db = None
-
- Log().quiet("Done")
-
-
-class TopologicalSortPass(Pass):
- """Sort changesets into commit order."""
-
- def register_artifacts(self):
- self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
- self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
- self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
- self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
-
- def get_source_changesets(self, changeset_db):
- for changeset_id in changeset_db.keys():
- yield changeset_db[changeset_id]
-
- def get_changesets(self):
- """Generate (changeset, timestamp) pairs in commit order."""
-
- changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
- DB_OPEN_READ)
-
- changeset_graph = ChangesetGraph(
- changeset_db,
- CVSItemToChangesetTable(
- artifact_manager.get_temp_file(
- config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
- ),
- DB_OPEN_READ,
- ),
- )
- symbol_changeset_ids = set()
-
- for changeset in self.get_source_changesets(changeset_db):
- changeset_graph.add_changeset(changeset)
- if isinstance(changeset, SymbolChangeset):
- symbol_changeset_ids.add(changeset.id)
-
- # Ensure a monotonically-increasing timestamp series by keeping
- # track of the previous timestamp and ensuring that the following
- # one is larger.
- timestamper = Timestamper()
-
- for (changeset, time_range) in changeset_graph.consume_graph():
- timestamp = timestamper.get(
- time_range.t_max, changeset.id in symbol_changeset_ids
- )
- yield (changeset, timestamp)
-
- changeset_graph.close()
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Generating CVSRevisions in commit order...")
-
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
- Ctx()._cvs_items_db = IndexedCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
- DB_OPEN_READ)
-
- sorted_changesets = open(
- artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
- 'w')
-
- for (changeset, timestamp) in self.get_changesets():
- sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
-
- sorted_changesets.close()
-
- Ctx()._cvs_items_db.close()
- Ctx()._symbol_db.close()
- Ctx()._cvs_file_db.close()
-
- Log().quiet("Done")
-
-
-class CreateRevsPass(Pass):
- """Generate the SVNCommit <-> CVSRevision mapping databases.
-
- SVNCommitCreator also calls SymbolingsLogger to register
- CVSRevisions that represent an opening or closing for a path on a
- branch or tag. See SymbolingsLogger for more details.
-
- This pass was formerly known as pass5."""
-
- def register_artifacts(self):
- self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
- self._register_temp_file(config.SVN_COMMITS_STORE)
- self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
- self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
- self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
- self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
-
- def get_changesets(self):
- """Generate (changeset,timestamp,) tuples in commit order."""
-
- changeset_db = ChangesetDatabase(
- artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
- artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
- DB_OPEN_READ)
-
- for line in file(
- artifact_manager.get_temp_file(
- config.CHANGESETS_SORTED_DATAFILE)):
- [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
- yield (changeset_db[changeset_id], timestamp)
-
- changeset_db.close()
-
- def get_svn_commits(self, creator):
- """Generate the SVNCommits, in order."""
-
- for (changeset, timestamp) in self.get_changesets():
- for svn_commit in creator.process_changeset(changeset, timestamp):
- yield svn_commit
-
- def log_svn_commit(self, svn_commit):
- """Output information about SVN_COMMIT."""
-
- Log().normal(
- 'Creating Subversion r%d (%s)'
- % (svn_commit.revnum, svn_commit.get_description(),)
- )
-
- if isinstance(svn_commit, SVNRevisionCommit):
- for cvs_rev in svn_commit.cvs_revs:
- Log().verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Mapping CVS revisions to Subversion commits...")
-
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
- Ctx()._cvs_items_db = IndexedCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
- DB_OPEN_READ)
-
- Ctx()._symbolings_logger = SymbolingsLogger()
-
- persistence_manager = PersistenceManager(DB_OPEN_NEW)
-
- creator = SVNCommitCreator()
- for svn_commit in self.get_svn_commits(creator):
- self.log_svn_commit(svn_commit)
- persistence_manager.put_svn_commit(svn_commit)
-
- stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
- del creator
-
- persistence_manager.close()
- Ctx()._symbolings_logger.close()
- Ctx()._cvs_items_db.close()
- Ctx()._symbol_db.close()
- Ctx()._cvs_file_db.close()
-
- Log().quiet("Done")
-
-
-class SortSymbolsPass(Pass):
- """This pass was formerly known as pass6."""
-
- def register_artifacts(self):
- self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
- self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Sorting symbolic name source revisions...")
-
- sort_file(
- artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
- artifact_manager.get_temp_file(
- config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
- options=['-k', '1,1', '-k', '2,2n', '-k', '3'],
- )
- Log().quiet("Done")
-
-
-class IndexSymbolsPass(Pass):
- """This pass was formerly known as pass7."""
-
- def register_artifacts(self):
- self._register_temp_file(config.SYMBOL_OFFSETS_DB)
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
-
- def generate_offsets_for_symbolings(self):
- """This function iterates through all the lines in
- SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
- SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
- where SYMBOLIC_NAME is first encountered. This will allow us to
- seek to the various offsets in the file and sequentially read only
- the openings and closings that we need."""
-
- offsets = {}
-
- f = open(
- artifact_manager.get_temp_file(
- config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
- 'r')
- old_id = None
- while True:
- fpos = f.tell()
- line = f.readline()
- if not line:
- break
- id, svn_revnum, ignored = line.split(" ", 2)
- id = int(id, 16)
- if id != old_id:
- Log().verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
- old_id = id
- offsets[id] = fpos
-
- f.close()
-
- offsets_db = file(
- artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
- cPickle.dump(offsets, offsets_db, -1)
- offsets_db.close()
-
- def run(self, run_options, stats_keeper):
- Log().quiet("Determining offsets for all symbolic names...")
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._symbol_db = SymbolDatabase()
- self.generate_offsets_for_symbolings()
- Ctx()._symbol_db.close()
- Log().quiet("Done.")
-
-
-class OutputPass(Pass):
- """This pass was formerly known as pass8."""
-
- def register_artifacts(self):
- self._register_temp_file_needed(config.PROJECTS)
- self._register_temp_file_needed(config.CVS_FILES_DB)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
- self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
- self._register_temp_file_needed(config.SYMBOL_DB)
- self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
- self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
- self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
- self._register_temp_file_needed(config.SVN_COMMITS_STORE)
- self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
- Ctx().output_option.register_artifacts(self)
-
- def get_svn_commits(self):
- """Generate the SVNCommits in commit order."""
-
- persistence_manager = PersistenceManager(DB_OPEN_READ)
-
- svn_revnum = 1 # The first non-trivial commit
-
- # Peek at the first revision to find the date to use to initialize
- # the repository:
- svn_commit = persistence_manager.get_svn_commit(svn_revnum)
-
- while svn_commit:
- yield svn_commit
- svn_revnum += 1
- svn_commit = persistence_manager.get_svn_commit(svn_revnum)
-
- persistence_manager.close()
-
- def run(self, run_options, stats_keeper):
- Ctx()._projects = read_projects(
- artifact_manager.get_temp_file(config.PROJECTS)
- )
- Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
- Ctx()._metadata_db = MetadataDatabase(
- artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
- artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
- DB_OPEN_READ,
- )
- Ctx()._cvs_items_db = IndexedCVSItemStore(
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
- artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
- DB_OPEN_READ)
- Ctx()._symbol_db = SymbolDatabase()
-
- Ctx().output_option.setup(stats_keeper.svn_rev_count())
-
- for svn_commit in self.get_svn_commits():
- svn_commit.output(Ctx().output_option)
-
- Ctx().output_option.cleanup()
-
- Ctx()._symbol_db.close()
- Ctx()._cvs_items_db.close()
- Ctx()._metadata_db.close()
- Ctx()._cvs_file_db.close()
-
-
-# The list of passes constituting a run of cvs2svn:
-passes = [
- CollectRevsPass(),
- CleanMetadataPass(),
- CollateSymbolsPass(),
- #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
- FilterSymbolsPass(),
- SortRevisionSummaryPass(),
- SortSymbolSummaryPass(),
- InitializeChangesetsPass(),
- #CheckIndexedItemStoreDependenciesPass(
- # config.CVS_ITEMS_SORTED_STORE,
- # config.CVS_ITEMS_SORTED_INDEX_TABLE),
- BreakRevisionChangesetCyclesPass(),
- RevisionTopologicalSortPass(),
- BreakSymbolChangesetCyclesPass(),
- BreakAllChangesetCyclesPass(),
- TopologicalSortPass(),
- CreateRevsPass(),
- SortSymbolsPass(),
- IndexSymbolsPass(),
- OutputPass(),
- ]
-
-
diff --git a/cvs2svn_lib/persistence_manager.py b/cvs2svn_lib/persistence_manager.py
deleted file mode 100644
index 8a622ab..0000000
--- a/cvs2svn_lib/persistence_manager.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains class PersistenceManager."""
-
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.common import DB_OPEN_READ
-from cvs2svn_lib.common import SVN_INVALID_REVNUM
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.record_table import SignedIntegerPacker
-from cvs2svn_lib.record_table import RecordTable
-from cvs2svn_lib.serializer import PrimedPickleSerializer
-from cvs2svn_lib.database import IndexedDatabase
-from cvs2svn_lib.svn_commit import SVNRevisionCommit
-from cvs2svn_lib.svn_commit import SVNInitialProjectCommit
-from cvs2svn_lib.svn_commit import SVNPrimaryCommit
-from cvs2svn_lib.svn_commit import SVNBranchCommit
-from cvs2svn_lib.svn_commit import SVNTagCommit
-from cvs2svn_lib.svn_commit import SVNPostCommit
-
-
-class PersistenceManager:
- """The PersistenceManager allows us to effectively store SVNCommits
- to disk and retrieve them later using only their subversion revision
- number as the key. It also returns the subversion revision number
- for a given CVSRevision's unique key.
-
- All information pertinent to each SVNCommit is stored in a series of
- on-disk databases so that SVNCommits can be retrieved on-demand.
-
- MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
- In 'new' mode, PersistenceManager will initialize a new set of on-disk
- databases and be fully-featured.
- In 'read' mode, PersistenceManager will open existing on-disk databases
- and the set_* methods will be unavailable."""
-
- def __init__(self, mode):
- self.mode = mode
- if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
- raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
- primer = (
- SVNInitialProjectCommit,
- SVNPrimaryCommit,
- SVNPostCommit,
- SVNBranchCommit,
- SVNTagCommit,
- )
- serializer = PrimedPickleSerializer(primer)
- self.svn_commit_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
- artifact_manager.get_temp_file(config.SVN_COMMITS_STORE),
- mode, serializer)
- self.cvs2svn_db = RecordTable(
- artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
- mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
-
- def get_svn_revnum(self, cvs_rev_id):
- """Return the Subversion revision number in which CVS_REV_ID was
- committed, or SVN_INVALID_REVNUM if there is no mapping for
- CVS_REV_ID."""
-
- return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM)
-
- def get_svn_commit(self, svn_revnum):
- """Return an SVNCommit that corresponds to SVN_REVNUM.
-
- If no SVNCommit exists for revnum SVN_REVNUM, then return None."""
-
- return self.svn_commit_db.get(svn_revnum, None)
-
- def put_svn_commit(self, svn_commit):
- """Record the bidirectional mapping between SVN_REVNUM and
- CVS_REVS and record associated attributes."""
-
- if self.mode == DB_OPEN_READ:
- raise RuntimeError, \
- 'Write operation attempted on read-only PersistenceManager'
-
- self.svn_commit_db[svn_commit.revnum] = svn_commit
-
- if isinstance(svn_commit, SVNRevisionCommit):
- for cvs_rev in svn_commit.cvs_revs:
- self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum
-
- def close(self):
- self.cvs2svn_db.close()
- self.cvs2svn_db = None
- self.svn_commit_db.close()
- self.svn_commit_db = None
-
-
diff --git a/cvs2svn_lib/process.py b/cvs2svn_lib/process.py
deleted file mode 100644
index 56469ce..0000000
--- a/cvs2svn_lib/process.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains generic utilities used by cvs2svn."""
-
-
-import subprocess
-
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import CommandError
-
-
-def call_command(command, **kw):
- """Call the specified command, checking that it exits successfully.
-
- Raise a FatalError if the command cannot be executed, or if it exits
- with a non-zero exit code. Pass KW as keyword arguments to
- subprocess.call()."""
-
- try:
- retcode = subprocess.call(command, **kw)
- if retcode < 0:
- raise FatalError(
- 'Command terminated by signal %d: "%s"'
- % (-retcode, ' '.join(command),)
- )
- elif retcode > 0:
- raise FatalError(
- 'Command failed with return code %d: "%s"'
- % (retcode, ' '.join(command),)
- )
- except OSError, e:
- raise FatalError(
- 'Command execution failed (%s): "%s"'
- % (e, ' '.join(command),)
- )
-
-
-class CommandFailedException(Exception):
- """Exception raised if check_command_runs() fails."""
-
- pass
-
-
-def check_command_runs(cmd, cmdname):
- """Check whether the command CMD can be executed without errors.
-
- CMD is a list or string, as accepted by subprocess.Popen(). CMDNAME
- is the name of the command as it should be included in exception
- error messages.
-
- This function checks three things: (1) the command can be run
- without throwing an OSError; (2) it exits with status=0; (3) it
- doesn't output anything to stderr. If any of these conditions is
- not met, raise a CommandFailedException describing the problem."""
-
- try:
- pipe = subprocess.Popen(
- cmd,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- )
- except OSError, e:
- raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
- pipe.stdin.close()
- pipe.stdout.read()
- errmsg = pipe.stderr.read()
- status = pipe.wait()
- if status or errmsg:
- msg = 'error executing %s: status %s' % (cmdname, status,)
- if errmsg:
- msg += ', error output:\n%s' % (errmsg,)
- raise CommandFailedException(msg)
-
-
-class PipeStream(object):
- """A file-like object from which revision contents can be read."""
-
- def __init__(self, pipe_command):
- self._pipe_command_str = ' '.join(pipe_command)
- self.pipe = subprocess.Popen(
- pipe_command,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- )
- self.pipe.stdin.close()
-
- def read(self, size=None):
- if size is None:
- return self.pipe.stdout.read()
- else:
- return self.pipe.stdout.read(size)
-
- def close(self):
- self.pipe.stdout.close()
- error_output = self.pipe.stderr.read()
- exit_status = self.pipe.wait()
- if exit_status:
- raise CommandError(self._pipe_command_str, exit_status, error_output)
-
-
diff --git a/cvs2svn_lib/project.py b/cvs2svn_lib/project.py
deleted file mode 100644
index 0fe92df..0000000
--- a/cvs2svn_lib/project.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains database facilities used by cvs2svn."""
-
-
-import re
-import os
-import cPickle
-
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import IllegalSVNPathError
-from cvs2svn_lib.common import normalize_svn_path
-from cvs2svn_lib.common import verify_paths_disjoint
-from cvs2svn_lib.symbol_transform import CompoundSymbolTransform
-
-
-class FileInAndOutOfAtticException(Exception):
- def __init__(self, non_attic_path, attic_path):
- Exception.__init__(
- self,
- "A CVS repository cannot contain both %s and %s"
- % (non_attic_path, attic_path))
-
- self.non_attic_path = non_attic_path
- self.attic_path = attic_path
-
-
-def normalize_ttb_path(opt, path, allow_empty=False):
- try:
- return normalize_svn_path(path, allow_empty)
- except IllegalSVNPathError, e:
- raise FatalError('Problem with %s: %s' % (opt, e,))
-
-
-class Project(object):
- """A project within a CVS repository."""
-
- def __init__(
- self, id, project_cvs_repos_path,
- initial_directories=[],
- symbol_transforms=None,
- ):
- """Create a new Project record.
-
- ID is a unique id for this project. PROJECT_CVS_REPOS_PATH is the
- main CVS directory for this project (within the filesystem).
-
- INITIAL_DIRECTORIES is an iterable of all SVN directories that
- should be created when the project is first created. Normally,
- this should include the trunk, branches, and tags directory.
-
- SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances
- which will be used to transform any symbol names within this
- project."""
-
- self.id = id
-
- self.project_cvs_repos_path = os.path.normpath(project_cvs_repos_path)
- if not os.path.isdir(self.project_cvs_repos_path):
- raise FatalError("The specified CVS repository path '%s' is not an "
- "existing directory." % self.project_cvs_repos_path)
-
- self.cvs_repository_root, self.cvs_module = \
- self.determine_repository_root(
- os.path.abspath(self.project_cvs_repos_path))
-
- # A regexp matching project_cvs_repos_path plus an optional separator:
- self.project_prefix_re = re.compile(
- r'^' + re.escape(self.project_cvs_repos_path)
- + r'(' + re.escape(os.sep) + r'|$)')
-
- # The SVN directories to add when the project is first created:
- self._initial_directories = []
-
- for path in initial_directories:
- try:
- path = normalize_svn_path(path, False)
- except IllegalSVNPathError, e:
- raise FatalError(
- 'Initial directory %r is not a legal SVN path: %s'
- % (path, e,)
- )
- self._initial_directories.append(path)
-
- verify_paths_disjoint(*self._initial_directories)
-
- # A list of transformation rules (regexp, replacement) applied to
- # symbol names in this project.
- if symbol_transforms is None:
- symbol_transforms = []
-
- self.symbol_transform = CompoundSymbolTransform(symbol_transforms)
-
- # The ID of the Trunk instance for this Project. This member is
- # filled in during CollectRevsPass.
- self.trunk_id = None
-
- # The ID of the CVSDirectory representing the root directory of
- # this project. This member is filled in during CollectRevsPass.
- self.root_cvs_directory_id = None
-
- def __eq__(self, other):
- return self.id == other.id
-
- def __cmp__(self, other):
- return cmp(self.cvs_module, other.cvs_module) \
- or cmp(self.id, other.id)
-
- def __hash__(self):
- return self.id
-
- @staticmethod
- def determine_repository_root(path):
- """Ascend above the specified PATH if necessary to find the
- cvs_repository_root (a directory containing a CVSROOT directory)
- and the cvs_module (the path of the conversion root within the cvs
- repository). Return the root path and the module path of this
- project relative to the root.
-
- NB: cvs_module must be seperated by '/', *not* by os.sep."""
-
- def is_cvs_repository_root(path):
- return os.path.isdir(os.path.join(path, 'CVSROOT'))
-
- original_path = path
- cvs_module = ''
- while not is_cvs_repository_root(path):
- # Step up one directory:
- prev_path = path
- path, module_component = os.path.split(path)
- if path == prev_path:
- # Hit the root (of the drive, on Windows) without finding a
- # CVSROOT dir.
- raise FatalError(
- "the path '%s' is not a CVS repository, nor a path "
- "within a CVS repository. A CVS repository contains "
- "a CVSROOT directory within its root directory."
- % (original_path,))
-
- cvs_module = module_component + "/" + cvs_module
-
- return path, cvs_module
-
- def transform_symbol(self, cvs_file, symbol_name, revision):
- """Transform the symbol SYMBOL_NAME.
-
- SYMBOL_NAME refers to revision number REVISION in CVS_FILE.
- REVISION is the CVS revision number as a string, with zeros
- removed (e.g., '1.7' or '1.7.2'). Use the renaming rules
- specified with --symbol-transform to possibly rename the symbol.
- Return the transformed symbol name, the original name if it should
- not be transformed, or None if the symbol should be omitted from
- the conversion."""
-
- return self.symbol_transform.transform(cvs_file, symbol_name, revision)
-
- def get_trunk(self):
- """Return the Trunk instance for this project.
-
- This method can only be called after self.trunk_id has been
- initialized in CollectRevsPass."""
-
- return Ctx()._symbol_db.get_symbol(self.trunk_id)
-
- def get_root_cvs_directory(self):
- """Return the root CVSDirectory instance for this project.
-
- This method can only be called after self.root_cvs_directory_id
- has been initialized in CollectRevsPass."""
-
- return Ctx()._cvs_file_db.get_file(self.root_cvs_directory_id)
-
- def get_initial_directories(self):
- """Generate the project's initial SVN directories.
-
- Yield as strings the SVN paths of directories that should be
- created when the project is first created."""
-
- # Yield the path of the Trunk symbol for this project (which might
- # differ from the one passed to the --trunk option because of
- # SymbolStrategyRules). The trunk path might be '' during a
- # trunk-only conversion, but that is OK because DumpfileDelegate
- # considers that directory to exist already and will therefore
- # ignore it:
- yield self.get_trunk().base_path
-
- for path in self._initial_directories:
- yield path
-
- def __str__(self):
- return self.project_cvs_repos_path
-
-
-def read_projects(filename):
- retval = {}
- for project in cPickle.load(open(filename, 'rb')):
- retval[project.id] = project
- return retval
-
-
-def write_projects(filename):
- cPickle.dump(Ctx()._projects.values(), open(filename, 'wb'), -1)
-
-
diff --git a/cvs2svn_lib/property_setters.py b/cvs2svn_lib/property_setters.py
deleted file mode 100644
index 7cf379e..0000000
--- a/cvs2svn_lib/property_setters.py
+++ /dev/null
@@ -1,385 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes to set Subversion properties on files."""
-
-
-import os
-import re
-import fnmatch
-import ConfigParser
-from cStringIO import StringIO
-
-from cvs2svn_lib.common import warning_prefix
-from cvs2svn_lib.log import Log
-
-
-class SVNPropertySetter:
- """Abstract class for objects that can set properties on a SVNCommitItem."""
-
- def set_properties(self, s_item):
- """Set any properties that can be determined for S_ITEM.
-
- S_ITEM is an instance of SVNCommitItem. This method should modify
- S_ITEM.svn_props in place."""
-
- raise NotImplementedError
-
-
-class CVSRevisionNumberSetter(SVNPropertySetter):
- """Set the cvs2svn:cvs-rev property to the CVS revision number."""
-
- propname = 'cvs2svn:cvs-rev'
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- s_item.svn_props[self.propname] = s_item.cvs_rev.rev
- s_item.svn_props_changed = True
-
-
-class ExecutablePropertySetter(SVNPropertySetter):
- """Set the svn:executable property based on cvs_rev.cvs_file.executable."""
-
- propname = 'svn:executable'
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- if s_item.cvs_rev.cvs_file.executable:
- s_item.svn_props[self.propname] = '*'
-
-
-class CVSBinaryFileEOLStyleSetter(SVNPropertySetter):
- """Set the eol-style to None for files with CVS mode '-kb'."""
-
- propname = 'svn:eol-style'
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- if s_item.cvs_rev.cvs_file.mode == 'b':
- s_item.svn_props[self.propname] = None
-
-
-class MimeMapper(SVNPropertySetter):
- """A class that provides mappings from file names to MIME types."""
-
- propname = 'svn:mime-type'
-
- def __init__(self, mime_types_file):
- self.mappings = { }
-
- for line in file(mime_types_file):
- if line.startswith("#"):
- continue
-
- # format of a line is something like
- # text/plain c h cpp
- extensions = line.split()
- if len(extensions) < 2:
- continue
- type = extensions.pop(0)
- for ext in extensions:
- if ext in self.mappings and self.mappings[ext] != type:
- Log().error(
- "%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
- % (warning_prefix, ext, self.mappings[ext], type)
- )
- self.mappings[ext] = type
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- basename, extension = os.path.splitext(s_item.cvs_rev.cvs_file.basename)
-
- # Extension includes the dot, so strip it (will leave extension
- # empty if filename ends with a dot, which is ok):
- extension = extension[1:]
-
- # If there is no extension (or the file ends with a period), use
- # the base name for mapping. This allows us to set mappings for
- # files such as README or Makefile:
- if not extension:
- extension = basename
-
- mime_type = self.mappings.get(extension, None)
- if mime_type is not None:
- s_item.svn_props[self.propname] = mime_type
-
-
-class AutoPropsPropertySetter(SVNPropertySetter):
- """Set arbitrary svn properties based on an auto-props configuration.
-
- This class supports case-sensitive or case-insensitive pattern
- matching. The command-line default is case-insensitive behavior,
- consistent with Subversion (see
- http://subversion.tigris.org/issues/show_bug.cgi?id=2036).
-
- As a special extension to Subversion's auto-props handling, if a
- property name is preceded by a '!' then that property is forced to
- be left unset.
-
- If a property specified in auto-props has already been set to a
- different value, print a warning and leave the old property value
- unchanged.
-
- Python's treatment of whitespaces in the ConfigParser module is
- buggy and inconsistent. Usually spaces are preserved, but if there
- is at least one semicolon in the value, and the *first* semicolon is
- preceded by a space, then that is treated as the start of a comment
- and the rest of the line is silently discarded."""
-
- property_name_pattern = r'(?P<name>[^\!\=\s]+)'
- property_unset_re = re.compile(
- r'^\!\s*' + property_name_pattern + r'$'
- )
- property_set_re = re.compile(
- r'^' + property_name_pattern + r'\s*\=\s*(?P<value>.*)$'
- )
- property_novalue_re = re.compile(
- r'^' + property_name_pattern + r'$'
- )
-
- quoted_re = re.compile(
- r'^([\'\"]).*\1$'
- )
- comment_re = re.compile(r'\s;')
-
- class Pattern:
- """Describes the properties to be set for files matching a pattern."""
-
- def __init__(self, pattern, propdict):
- # A glob-like pattern:
- self.pattern = pattern
- # A dictionary of properties that should be set:
- self.propdict = propdict
-
- def match(self, basename):
- """Does the file with the specified basename match pattern?"""
-
- return fnmatch.fnmatch(basename, self.pattern)
-
- def __init__(self, configfilename, ignore_case=True):
- config = ConfigParser.ConfigParser()
- if ignore_case:
- self.transform_case = self.squash_case
- else:
- config.optionxform = self.preserve_case
- self.transform_case = self.preserve_case
-
- configtext = open(configfilename).read()
- if self.comment_re.search(configtext):
- Log().warn(
- '%s: Please be aware that a space followed by a\n'
- 'semicolon is sometimes treated as a comment in configuration\n'
- 'files. This pattern was seen in\n'
- ' %s\n'
- 'Please make sure that you have not inadvertently commented\n'
- 'out part of an important line.'
- % (warning_prefix, configfilename,)
- )
-
- config.readfp(StringIO(configtext), configfilename)
- self.patterns = []
- sections = config.sections()
- sections.sort()
- for section in sections:
- if self.transform_case(section) == 'auto-props':
- patterns = config.options(section)
- patterns.sort()
- for pattern in patterns:
- value = config.get(section, pattern)
- if value:
- self._add_pattern(pattern, value)
-
- def squash_case(self, s):
- return s.lower()
-
- def preserve_case(self, s):
- return s
-
- def _add_pattern(self, pattern, props):
- propdict = {}
- if self.quoted_re.match(pattern):
- Log().warn(
- '%s: Quoting is not supported in auto-props; please verify rule\n'
- 'for %r. (Using pattern including quotation marks.)\n'
- % (warning_prefix, pattern,)
- )
- for prop in props.split(';'):
- prop = prop.strip()
- m = self.property_unset_re.match(prop)
- if m:
- name = m.group('name')
- Log().debug(
- 'auto-props: For %r, leaving %r unset.' % (pattern, name,)
- )
- propdict[name] = None
- continue
-
- m = self.property_set_re.match(prop)
- if m:
- name = m.group('name')
- value = m.group('value')
- if self.quoted_re.match(value):
- Log().warn(
- '%s: Quoting is not supported in auto-props; please verify\n'
- 'rule %r for pattern %r. (Using value\n'
- 'including quotation marks.)\n'
- % (warning_prefix, prop, pattern,)
- )
- Log().debug(
- 'auto-props: For %r, setting %r to %r.' % (pattern, name, value,)
- )
- propdict[name] = value
- continue
-
- m = self.property_novalue_re.match(prop)
- if m:
- name = m.group('name')
- Log().debug(
- 'auto-props: For %r, setting %r to the empty string'
- % (pattern, name,)
- )
- propdict[name] = ''
- continue
-
- Log().warn(
- '%s: in auto-props line for %r, value %r cannot be parsed (ignored)'
- % (warning_prefix, pattern, prop,)
- )
-
- self.patterns.append(self.Pattern(self.transform_case(pattern), propdict))
-
- def get_propdict(self, cvs_file):
- basename = self.transform_case(cvs_file.basename)
- propdict = {}
- for pattern in self.patterns:
- if pattern.match(basename):
- for (key,value) in pattern.propdict.items():
- if key in propdict:
- if propdict[key] != value:
- Log().warn(
- "Contradictory values set for property '%s' for file %s."
- % (key, cvs_file,))
- else:
- propdict[key] = value
-
- return propdict
-
- def set_properties(self, s_item):
- propdict = self.get_propdict(s_item.cvs_rev.cvs_file)
- for (k,v) in propdict.items():
- if k in s_item.svn_props:
- if s_item.svn_props[k] != v:
- Log().warn(
- "Property '%s' already set to %r for file %s; "
- "auto-props value (%r) ignored."
- % (k, s_item.svn_props[k], s_item.cvs_rev.cvs_path, v,))
- else:
- s_item.svn_props[k] = v
-
-
-class CVSBinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
- """If the file is binary and its svn:mime-type property is not yet
- set, set it to 'application/octet-stream'."""
-
- propname = 'svn:mime-type'
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- if s_item.cvs_rev.cvs_file.mode == 'b':
- s_item.svn_props[self.propname] = 'application/octet-stream'
-
-
-class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
- """Set svn:eol-style based on svn:mime-type.
-
- If svn:mime-type is known but svn:eol-style is not, then set
- svn:eol-style based on svn:mime-type as follows: if svn:mime-type
- starts with 'text/', then set svn:eol-style to native; otherwise,
- force it to remain unset. See also issue #39."""
-
- propname = 'svn:eol-style'
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- if s_item.svn_props.get('svn:mime-type', None) is not None:
- if s_item.svn_props['svn:mime-type'].startswith("text/"):
- s_item.svn_props[self.propname] = 'native'
- else:
- s_item.svn_props[self.propname] = None
-
-
-class DefaultEOLStyleSetter(SVNPropertySetter):
- """Set the eol-style if one has not already been set."""
-
- propname = 'svn:eol-style'
-
- def __init__(self, value):
- """Initialize with the specified default VALUE."""
-
- self.value = value
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- s_item.svn_props[self.propname] = self.value
-
-
-class SVNBinaryFileKeywordsPropertySetter(SVNPropertySetter):
- """Turn off svn:keywords for files with binary svn:eol-style."""
-
- propname = 'svn:keywords'
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- if not s_item.svn_props.get('svn:eol-style'):
- s_item.svn_props[self.propname] = None
-
-
-class KeywordsPropertySetter(SVNPropertySetter):
- """If the svn:keywords property is not yet set, set it based on the
- file's mode. See issue #2."""
-
- propname = 'svn:keywords'
-
- def __init__(self, value):
- """Use VALUE for the value of the svn:keywords property if it is
- to be set."""
-
- self.value = value
-
- def set_properties(self, s_item):
- if self.propname in s_item.svn_props:
- return
-
- if s_item.cvs_rev.cvs_file.mode in [None, 'kv', 'kvl']:
- s_item.svn_props[self.propname] = self.value
-
-
diff --git a/cvs2svn_lib/rcs_revision_manager.py b/cvs2svn_lib/rcs_revision_manager.py
deleted file mode 100644
index 1c2dfcf..0000000
--- a/cvs2svn_lib/rcs_revision_manager.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Access the CVS repository via RCS's 'co' command."""
-
-
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.process import check_command_runs
-from cvs2svn_lib.process import PipeStream
-from cvs2svn_lib.process import CommandFailedException
-from cvs2svn_lib.revision_manager import RevisionReader
-
-
-class RCSRevisionReader(RevisionReader):
- """A RevisionReader that reads the contents via RCS."""
-
- def __init__(self, co_executable):
- self.co_executable = co_executable
- try:
- check_command_runs([self.co_executable, '-V'], self.co_executable)
- except CommandFailedException, e:
- raise FatalError('%s\n'
- 'Please check that co is installed and in your PATH\n'
- '(it is a part of the RCS software).' % (e,))
-
- def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
- pipe_cmd = [
- self.co_executable,
- '-q',
- '-x,v',
- '-p%s' % (cvs_rev.rev,)
- ]
- if suppress_keyword_substitution:
- pipe_cmd.append('-kk')
- pipe_cmd.append(cvs_rev.cvs_file.filename)
- return PipeStream(pipe_cmd)
-
-
diff --git a/cvs2svn_lib/rcs_stream.py b/cvs2svn_lib/rcs_stream.py
deleted file mode 100644
index b893819..0000000
--- a/cvs2svn_lib/rcs_stream.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module processes RCS diffs (deltas)."""
-
-
-import re
-
-def msplit(s):
- """Split S into an array of lines.
-
- Only \n is a line separator. The line endings are part of the lines."""
-
- # return s.splitlines(True) clobbers \r
- re = [ i + "\n" for i in s.split("\n") ]
- re[-1] = re[-1][:-1]
- if not re[-1]:
- del re[-1]
- return re
-
-
-class MalformedDeltaException(Exception):
- """A malformed RCS delta was encountered."""
-
- pass
-
-class RCSStream:
- """This class represents a single file object to which RCS deltas can be
- applied in various ways."""
-
- ad_command = re.compile(r'^([ad])(\d+)\s(\d+)\n$')
- a_command = re.compile(r'^a(\d+)\s(\d+)\n$')
-
- def __init__(self, text):
- """Instantiate and initialize the file content with TEXT."""
-
- self._texts = msplit(text)
-
- def get_text(self):
- """Return the current file content."""
-
- return "".join(self._texts)
-
- def apply_diff(self, diff):
- """Apply the RCS diff DIFF to the current file content."""
-
- ntexts = []
- ooff = 0
- diffs = msplit(diff)
- i = 0
- while i < len(diffs):
- admatch = self.ad_command.match(diffs[i])
- if not admatch:
- raise MalformedDeltaException('Bad ed command')
- i += 1
- sl = int(admatch.group(2))
- cn = int(admatch.group(3))
- if admatch.group(1) == 'd': # "d" - Delete command
- sl -= 1
- if sl < ooff:
- raise MalformedDeltaException('Deletion before last edit')
- if sl > len(self._texts):
- raise MalformedDeltaException('Deletion past file end')
- if sl + cn > len(self._texts):
- raise MalformedDeltaException('Deletion beyond file end')
- ntexts += self._texts[ooff:sl]
- ooff = sl + cn
- else: # "a" - Add command
- if sl < ooff: # Also catches same place
- raise MalformedDeltaException('Insertion before last edit')
- if sl > len(self._texts):
- raise MalformedDeltaException('Insertion past file end')
- ntexts += self._texts[ooff:sl] + diffs[i:i + cn]
- ooff = sl
- i += cn
- self._texts = ntexts + self._texts[ooff:]
-
- def invert_diff(self, diff):
- """Apply the RCS diff DIFF to the current file content and simultaneously
- generate an RCS diff suitable for reverting the change."""
-
- ntexts = []
- ooff = 0
- diffs = msplit(diff)
- ndiffs = []
- adjust = 0
- i = 0
- while i < len(diffs):
- admatch = self.ad_command.match(diffs[i])
- if not admatch:
- raise MalformedDeltaException('Bad ed command')
- i += 1
- sl = int(admatch.group(2))
- cn = int(admatch.group(3))
- if admatch.group(1) == 'd': # "d" - Delete command
- sl -= 1
- if sl < ooff:
- raise MalformedDeltaException('Deletion before last edit')
- if sl > len(self._texts):
- raise MalformedDeltaException('Deletion past file end')
- if sl + cn > len(self._texts):
- raise MalformedDeltaException('Deletion beyond file end')
- # Handle substitution explicitly, as add must come after del
- # (last add may end in no newline, so no command can follow).
- if i < len(diffs):
- amatch = self.a_command.match(diffs[i])
- else:
- amatch = None
- if amatch and int(amatch.group(1)) == sl + cn:
- cn2 = int(amatch.group(2))
- i += 1
- ndiffs += ["d%d %d\na%d %d\n" % \
- (sl + 1 + adjust, cn2, sl + adjust + cn2, cn)] + \
- self._texts[sl:sl + cn]
- ntexts += self._texts[ooff:sl] + diffs[i:i + cn2]
- adjust += cn2 - cn
- i += cn2
- else:
- ndiffs += ["a%d %d\n" % (sl + adjust, cn)] + \
- self._texts[sl:sl + cn]
- ntexts += self._texts[ooff:sl]
- adjust -= cn
- ooff = sl + cn
- else: # "a" - Add command
- if sl < ooff: # Also catches same place
- raise MalformedDeltaException('Insertion before last edit')
- if sl > len(self._texts):
- raise MalformedDeltaException('Insertion past file end')
- ndiffs += ["d%d %d\n" % (sl + 1 + adjust, cn)]
- ntexts += self._texts[ooff:sl] + diffs[i:i + cn]
- ooff = sl
- adjust += cn
- i += cn
- self._texts = ntexts + self._texts[ooff:]
- return "".join(ndiffs)
-
diff --git a/cvs2svn_lib/record_table.py b/cvs2svn_lib/record_table.py
deleted file mode 100644
index 41ab84a..0000000
--- a/cvs2svn_lib/record_table.py
+++ /dev/null
@@ -1,399 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Classes to manage Databases of fixed-length records.
-
-The databases map small, non-negative integers to fixed-size records.
-The records are written in index order to a disk file. Gaps in the
-index sequence leave gaps in the data file, so for best space
-efficiency the indexes of existing records should be approximately
-continuous.
-
-To use a RecordTable, you need a class derived from Packer which can
-serialize/deserialize your records into fixed-size strings. Deriving
-classes have to specify how to pack records into strings and unpack
-strings into records by overwriting the pack() and unpack() methods
-respectively.
-
-Note that these classes keep track of gaps in the records that have
-been written by filling them with packer.empty_value. If a record is
-read which contains packer.empty_value, then a KeyError is raised."""
-
-
-import os
-import types
-import struct
-import mmap
-
-from cvs2svn_lib.common import DB_OPEN_READ
-from cvs2svn_lib.common import DB_OPEN_WRITE
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.log import Log
-
-
-# A unique value that can be used to stand for "unset" without
-# preventing the use of None.
-_unset = object()
-
-
-class Packer(object):
- def __init__(self, record_len, empty_value=None):
- self.record_len = record_len
- if empty_value is None:
- self.empty_value = '\0' * self.record_len
- else:
- assert type(empty_value) is types.StringType
- assert len(empty_value) == self.record_len
- self.empty_value = empty_value
-
- def pack(self, v):
- """Pack record V into a string of length self.record_len."""
-
- raise NotImplementedError()
-
- def unpack(self, s):
- """Unpack string S into a record."""
-
- raise NotImplementedError()
-
-
-class StructPacker(Packer):
- def __init__(self, format, empty_value=_unset):
- self.format = format
- if empty_value is not _unset:
- empty_value = self.pack(empty_value)
- else:
- empty_value = None
-
- Packer.__init__(self, struct.calcsize(self.format),
- empty_value=empty_value)
-
- def pack(self, v):
- return struct.pack(self.format, v)
-
- def unpack(self, v):
- return struct.unpack(self.format, v)[0]
-
-
-class UnsignedIntegerPacker(StructPacker):
- def __init__(self, empty_value=0):
- StructPacker.__init__(self, '=I', empty_value)
-
-
-class SignedIntegerPacker(StructPacker):
- def __init__(self, empty_value=0):
- StructPacker.__init__(self, '=i', empty_value)
-
-
-class FileOffsetPacker(Packer):
- """A packer suitable for file offsets.
-
- We store the 5 least significant bytes of the file offset. This is
- enough bits to represent 1 TiB. Of course if the computer
- doesn't have large file support, only the lowest 31 bits can be
- nonzero, and the offsets are limited to 2 GiB."""
-
- # Convert file offsets to 8-bit little-endian unsigned longs...
- INDEX_FORMAT = '<Q'
- # ...but then truncate to 5 bytes.
- INDEX_FORMAT_LEN = 5
-
- PAD = '\0' * (struct.calcsize(INDEX_FORMAT) - INDEX_FORMAT_LEN)
-
- def __init__(self):
- Packer.__init__(self, self.INDEX_FORMAT_LEN)
-
- def pack(self, v):
- return struct.pack(self.INDEX_FORMAT, v)[:self.INDEX_FORMAT_LEN]
-
- def unpack(self, s):
- return struct.unpack(self.INDEX_FORMAT, s + self.PAD)[0]
-
-
-class RecordTableAccessError(RuntimeError):
- pass
-
-
-class AbstractRecordTable:
- def __init__(self, filename, mode, packer):
- self.filename = filename
- self.mode = mode
- self.packer = packer
- # Simplify and speed access to this oft-needed quantity:
- self._record_len = self.packer.record_len
-
- def __str__(self):
- return '%s(%r)' % (self.__class__.__name__, self.filename,)
-
- def _set_packed_record(self, i, s):
- """Set the value for index I to the packed value S."""
-
- raise NotImplementedError()
-
- def __setitem__(self, i, v):
- self._set_packed_record(i, self.packer.pack(v))
-
- def _get_packed_record(self, i):
- """Return the packed record for index I.
-
- Raise KeyError if it is not present."""
-
- raise NotImplementedError()
-
- def __getitem__(self, i):
- """Return the item for index I.
-
- Raise KeyError if that item has never been set (or if it was set
- to self.packer.empty_value)."""
-
- s = self._get_packed_record(i)
-
- if s == self.packer.empty_value:
- raise KeyError(i)
-
- return self.packer.unpack(s)
-
- def get_many(self, indexes, default=None):
- """Yield (index, item) typles for INDEXES in arbitrary order.
-
- Yield (index,default) for indices for which not item is defined."""
-
- indexes = list(indexes)
- # Sort the indexes to reduce disk seeking:
- indexes.sort()
- for i in indexes:
- yield (i, self.get(i, default))
-
- def get(self, i, default=None):
- try:
- return self[i]
- except KeyError:
- return default
-
- def __delitem__(self, i):
- """Delete the item for index I.
-
- Raise KeyError if that item has never been set (or if it was set
- to self.packer.empty_value)."""
-
- if self.mode == DB_OPEN_READ:
- raise RecordTableAccessError()
-
- # Check that the value was set (otherwise raise KeyError):
- self[i]
- self._set_packed_record(i, self.packer.empty_value)
-
- def iterkeys(self):
- """Yield the keys in the map in key order."""
-
- for i in xrange(0, self._limit):
- try:
- self[i]
- yield i
- except KeyError:
- pass
-
- def itervalues(self):
- """Yield the values in the map in key order.
-
- Skip over values that haven't been defined."""
-
- for i in xrange(0, self._limit):
- try:
- yield self[i]
- except KeyError:
- pass
-
-
-class RecordTable(AbstractRecordTable):
- # The approximate amount of memory that should be used for the cache
- # for each instance of this class:
- CACHE_MEMORY = 4 * 1024 * 1024
-
- # Empirically, each entry in the cache table has an overhead of
- # about 96 bytes on a 32-bit computer.
- CACHE_OVERHEAD_PER_ENTRY = 96
-
- def __init__(self, filename, mode, packer, cache_memory=CACHE_MEMORY):
- AbstractRecordTable.__init__(self, filename, mode, packer)
- if self.mode == DB_OPEN_NEW:
- self.f = open(self.filename, 'wb+')
- elif self.mode == DB_OPEN_WRITE:
- self.f = open(self.filename, 'rb+')
- elif self.mode == DB_OPEN_READ:
- self.f = open(self.filename, 'rb')
- else:
- raise RuntimeError('Invalid mode %r' % self.mode)
- self.cache_memory = cache_memory
-
- # Number of items that can be stored in the write cache.
- self._max_memory_cache = (
- self.cache_memory
- / (self.CACHE_OVERHEAD_PER_ENTRY + self._record_len))
-
- # Read and write cache; a map {i : (dirty, s)}, where i is an
- # index, dirty indicates whether the value has to be written to
- # disk, and s is the packed value for the index. Up to
- # self._max_memory_cache items can be stored here. When the cache
- # fills up, it is written to disk in one go and then cleared.
- self._cache = {}
-
- # The index just beyond the last record ever written:
- self._limit = os.path.getsize(self.filename) // self._record_len
-
- # The index just beyond the last record ever written to disk:
- self._limit_written = self._limit
-
- def flush(self):
- Log().debug('Flushing cache for %s' % (self,))
-
- pairs = [(i, s) for (i, (dirty, s)) in self._cache.items() if dirty]
-
- if pairs:
- pairs.sort()
- old_i = None
- f = self.f
- for (i, s) in pairs:
- if i == old_i:
- # No seeking needed
- pass
- elif i <= self._limit_written:
- # Just jump there:
- f.seek(i * self._record_len)
- else:
- # Jump to the end of the file then write _empty_values until
- # we reach the correct location:
- f.seek(self._limit_written * self._record_len)
- while self._limit_written < i:
- f.write(self.packer.empty_value)
- self._limit_written += 1
- f.write(s)
- old_i = i + 1
- self._limit_written = max(self._limit_written, old_i)
-
- self.f.flush()
-
- self._cache.clear()
-
- def _set_packed_record(self, i, s):
- if self.mode == DB_OPEN_READ:
- raise RecordTableAccessError()
- if i < 0:
- raise KeyError()
- self._cache[i] = (True, s)
- if len(self._cache) >= self._max_memory_cache:
- self.flush()
- self._limit = max(self._limit, i + 1)
-
- def _get_packed_record(self, i):
- try:
- return self._cache[i][1]
- except KeyError:
- if not 0 <= i < self._limit_written:
- raise KeyError(i)
- self.f.seek(i * self._record_len)
- s = self.f.read(self._record_len)
- self._cache[i] = (False, s)
- if len(self._cache) >= self._max_memory_cache:
- self.flush()
-
- return s
-
- def close(self):
- self.flush()
- self._cache = None
- self.f.close()
- self.f = None
-
-
-class MmapRecordTable(AbstractRecordTable):
- GROWTH_INCREMENT = 65536
-
- def __init__(self, filename, mode, packer):
- AbstractRecordTable.__init__(self, filename, mode, packer)
- if self.mode == DB_OPEN_NEW:
- self.python_file = open(self.filename, 'wb+')
- self.python_file.write('\0' * self.GROWTH_INCREMENT)
- self.python_file.flush()
- self._filesize = self.GROWTH_INCREMENT
- self.f = mmap.mmap(
- self.python_file.fileno(), self._filesize,
- access=mmap.ACCESS_WRITE
- )
-
- # The index just beyond the last record ever written:
- self._limit = 0
- elif self.mode == DB_OPEN_WRITE:
- self.python_file = open(self.filename, 'rb+')
- self._filesize = os.path.getsize(self.filename)
- self.f = mmap.mmap(
- self.python_file.fileno(), self._filesize,
- access=mmap.ACCESS_WRITE
- )
-
- # The index just beyond the last record ever written:
- self._limit = os.path.getsize(self.filename) // self._record_len
- elif self.mode == DB_OPEN_READ:
- self.python_file = open(self.filename, 'rb')
- self._filesize = os.path.getsize(self.filename)
- self.f = mmap.mmap(
- self.python_file.fileno(), self._filesize,
- access=mmap.ACCESS_READ
- )
-
- # The index just beyond the last record ever written:
- self._limit = os.path.getsize(self.filename) // self._record_len
- else:
- raise RuntimeError('Invalid mode %r' % self.mode)
-
- def flush(self):
- self.f.flush()
-
- def _set_packed_record(self, i, s):
- if self.mode == DB_OPEN_READ:
- raise RecordTableAccessError()
- if i < 0:
- raise KeyError()
- if i >= self._limit:
- # This write extends the range of valid indices. First check
- # whether the file has to be enlarged:
- new_size = (i + 1) * self._record_len
- if new_size > self._filesize:
- self._filesize = (
- (new_size + self.GROWTH_INCREMENT - 1)
- // self.GROWTH_INCREMENT
- * self.GROWTH_INCREMENT
- )
- self.f.resize(self._filesize)
- if i > self._limit:
- # Pad up to the new record with empty_value:
- self.f[self._limit * self._record_len:i * self._record_len] = \
- self.packer.empty_value * (i - self._limit)
- self._limit = i + 1
-
- self.f[i * self._record_len:(i + 1) * self._record_len] = s
-
- def _get_packed_record(self, i):
- if not 0 <= i < self._limit:
- raise KeyError(i)
- return self.f[i * self._record_len:(i + 1) * self._record_len]
-
- def close(self):
- self.flush()
- self.f.close()
- self.python_file.close()
-
-
diff --git a/cvs2svn_lib/repository_delegate.py b/cvs2svn_lib/repository_delegate.py
deleted file mode 100644
index 53c9b65..0000000
--- a/cvs2svn_lib/repository_delegate.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains class RepositoryDelegate."""
-
-
-import os
-import subprocess
-
-from cvs2svn_lib.common import CommandError
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.config import DUMPFILE
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate
-
-
-class RepositoryDelegate(DumpfileDelegate):
- """Creates a new Subversion Repository. DumpfileDelegate does all
- of the heavy lifting."""
-
- def __init__(self, revision_reader, target):
- self.target = target
-
- # Since the output of this run is a repository, not a dumpfile,
- # the temporary dumpfiles we create should go in the tmpdir. But
- # since we delete it ourselves, we don't want to use
- # artifact_manager.
- DumpfileDelegate.__init__(
- self, revision_reader, Ctx().get_temp_filename(DUMPFILE)
- )
-
- self.dumpfile = open(self.dumpfile_path, 'w+b')
- self.loader_pipe = subprocess.Popen(
- [Ctx().svnadmin_executable, 'load', '-q', self.target],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- )
- self.loader_pipe.stdout.close()
- try:
- self._write_dumpfile_header(self.loader_pipe.stdin)
- except IOError:
- raise FatalError(
- 'svnadmin failed with the following output while '
- 'loading the dumpfile:\n%s'
- % (self.loader_pipe.stderr.read(),)
- )
-
- def start_commit(self, revnum, revprops):
- """Start a new commit."""
-
- DumpfileDelegate.start_commit(self, revnum, revprops)
-
- def end_commit(self):
- """Feed the revision stored in the dumpfile to the svnadmin load pipe."""
-
- DumpfileDelegate.end_commit(self)
-
- self.dumpfile.seek(0)
- while True:
- data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
- if not data:
- break
- try:
- self.loader_pipe.stdin.write(data)
- except IOError:
- raise FatalError("svnadmin failed with the following output "
- "while loading the dumpfile:\n"
- + self.loader_pipe.stderr.read())
- self.dumpfile.seek(0)
- self.dumpfile.truncate()
-
- def finish(self):
- """Clean up."""
-
- self.dumpfile.close()
- self.loader_pipe.stdin.close()
- error_output = self.loader_pipe.stderr.read()
- exit_status = self.loader_pipe.wait()
- del self.loader_pipe
- if exit_status:
- raise CommandError('svnadmin load', exit_status, error_output)
- os.remove(self.dumpfile_path)
-
-
diff --git a/cvs2svn_lib/repository_mirror.py b/cvs2svn_lib/repository_mirror.py
deleted file mode 100644
index 72e2ba1..0000000
--- a/cvs2svn_lib/repository_mirror.py
+++ /dev/null
@@ -1,897 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the RepositoryMirror class and supporting classes.
-
-RepositoryMirror represents the skeleton of a versioned file tree with
-multiple lines of development ('LODs'). It records the presence or
-absence of files and directories, but not their contents. Given three
-values (revnum, lod, cvs_path), it can tell you whether the specified
-CVSPath existed on the specified LOD in the given revision number.
-The file trees corresponding to the most recent revision can be
-modified.
-
-The individual file trees are stored using immutable tree structures.
-Each directory node is represented as a MirrorDirectory instance,
-which is basically a map {cvs_path : node_id}, where cvs_path is a
-CVSPath within the directory, and node_id is an integer ID that
-uniquely identifies another directory node if that node is a
-CVSDirectory, or None if that node is a CVSFile. If a directory node
-is to be modified, then first a new node is created with a copy of the
-original node's contents, then the copy is modified. A reference to
-the copy also has to be stored in the parent node, meaning that the
-parent node needs to be modified, and so on recursively to the root
-node of the file tree. This data structure allows cheap deep copies,
-which is useful for tagging and branching.
-
-The class must also be able to find the root directory node
-corresponding to a particular (revnum, lod). This is done by keeping
-an LODHistory instance for each LOD, which can determine the root
-directory node ID for that LOD for any revnum. It does so by
-recording changes to the root directory node ID only for revisions in
-which it changed. Thus it stores two arrays, revnums (a list of the
-revision numbers when the ID changed), and ids (a list of the
-corresponding IDs). To find the ID for a particular revnum, first a
-binary search is done in the revnums array to find the index of the
-last change preceding revnum, then the corresponding ID is read from
-the ids array. Since most revisions change only one LOD, this allows
-storage of the history of potentially tens of thousands of LODs over
-hundreds of thousands of revisions in an amount of space that scales
-as O(numberOfLODs + numberOfRevisions), rather than O(numberOfLODs *
-numberOfRevisions) as would be needed if the information were stored
-in the equivalent of a 2D array.
-
-The internal operation of these classes is somewhat intricate, but the
-interface attempts to hide the complexity, enforce the usage rules,
-and allow efficient access. The most important facts to remember are
-(1) that a directory node can be used for multiple purposes (for
-multiple branches and for multiple revisions on a single branch), (2)
-that only a node that has been created within the current revision is
-allowed to be mutated, and (3) that the current revision can include
-nodes carried over from prior revisions, which are immutable.
-
-This leads to a bewildering variety of MirrorDirectory classes. The
-most important distinction is between OldMirrorDirectories and
-CurrentMirrorDirectories. A single node can be represented multiple
-ways in memory at the same time, depending on whether it was looked up
-as part of the current revision or part of an old revision:
-
- MirrorDirectory -- the base class for all MirrorDirectory nodes.
- This class allows lookup of subnodes and iteration over
- subnodes.
-
- OldMirrorDirectory -- a MirrorDirectory that was looked up for an
- old revision. These instances are immutable, as only the
- current revision is allowed to be modified.
-
- CurrentMirrorDirectory -- a MirrorDirectory that was looked up for
- the current revision. Such an instance is always logically
- mutable, though mutating it might require the node to be
- copied first. Such an instance might represent a node that
- has already been copied during this revision and can therefore
- be modified freely (such nodes implement
- _WritableMirrorDirectoryMixin), or it might represent a node
- that was carried over from an old revision and hasn't been
- copied yet (such nodes implement
- _ReadOnlyMirrorDirectoryMixin). If the latter, then the node
- copies itself (and bubbles up the change) before allowing
- itself to be modified. But the distinction is managed
- internally; client classes should not have to worry about it.
-
- CurrentMirrorLODDirectory -- A CurrentMirrorDirectory representing
- the root directory of a line of development in the current
- revision. This class has two concrete subclasses,
- _CurrentMirrorReadOnlyLODDirectory and
- _CurrentMirrorWritableLODDirectory, depending on whether the
- node has already been copied during this revision.
-
-
- CurrentMirrorSubdirectory -- A CurrentMirrorDirectory representing
- a subdirectory within a line of development's directory tree
- in the current revision. This class has two concrete
- subclasses, _CurrentMirrorReadOnlySubdirectory and
- _CurrentMirrorWritableSubdirectory, depending on whether the
- node has already been copied during this revision.
-
- DeletedCurrentMirrorDirectory -- a MirrorDirectory that has been
- deleted. Such an instance is disabled so that it cannot
- accidentally be used.
-
-While a revision is being processed, RepositoryMirror._new_nodes holds
-every writable CurrentMirrorDirectory instance (i.e., every node that
-has been created in the revision). Since these nodes are mutable, it
-is important that there be exactly one instance associated with each
-node; otherwise there would be problems keeping the instances
-synchronized. These are written to the database by
-RepositoryMirror.end_commit().
-
-OldMirrorDirectory and read-only CurrentMirrorDirectory instances are
-*not* cached; they are recreated whenever they are referenced. There
-might be multiple instances referring to the same node. A read-only
-CurrentMirrorDirectory instance is mutated in place into a writable
-CurrentMirrorDirectory instance if it needs to be modified.
-
-FIXME: The rules for when a MirrorDirectory instance can continue to
-be used vs. when it has to be read again (because it has been modified
-indirectly and therefore copied) are confusing and error-prone.
-Probably the semantics should be changed.
-
-"""
-
-
-import bisect
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.cvs_file import CVSFile
-from cvs2svn_lib.cvs_file import CVSDirectory
-from cvs2svn_lib.key_generator import KeyGenerator
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.serializer import MarshalSerializer
-from cvs2svn_lib.database import IndexedDatabase
-
-
-class RepositoryMirrorError(Exception):
- """An error related to the RepositoryMirror."""
-
- pass
-
-
-class LODExistsError(RepositoryMirrorError):
- """The LOD already exists in the repository.
-
- Exception raised if an attempt is made to add an LOD to the
- repository mirror and that LOD already exists in the youngest
- revision of the repository."""
-
- pass
-
-
-class PathExistsError(RepositoryMirrorError):
- """The path already exists in the repository.
-
- Exception raised if an attempt is made to add a path to the
- repository mirror and that path already exists in the youngest
- revision of the repository."""
-
- pass
-
-
-class DeletedNodeReusedError(RepositoryMirrorError):
- """The MirrorDirectory has already been deleted and shouldn't be reused."""
-
- pass
-
-
-class CopyFromCurrentNodeError(RepositoryMirrorError):
- """A CurrentMirrorDirectory cannot be copied to the current revision."""
-
- pass
-
-
-class MirrorDirectory(object):
- """Represent a node within the RepositoryMirror.
-
- Instances of this class act like a map {CVSPath : MirrorDirectory},
- where CVSPath is an item within this directory (i.e., a file or
- subdirectory within this directory). The value is either another
- MirrorDirectory instance (for directories) or None (for files)."""
-
- def __init__(self, repo, id, entries):
- # The RepositoryMirror containing this directory:
- self.repo = repo
-
- # The id of this node:
- self.id = id
-
- # The entries within this directory, stored as a map {CVSPath :
- # node_id}. The node_ids are integers for CVSDirectories, None
- # for CVSFiles:
- self._entries = entries
-
- def __getitem__(self, cvs_path):
- """Return the MirrorDirectory associated with the specified subnode.
-
- Return a MirrorDirectory instance if the subnode is a
- CVSDirectory; None if it is a CVSFile. Raise KeyError if the
- specified subnode does not exist."""
-
- raise NotImplementedError()
-
- def __len__(self):
- """Return the number of CVSPaths within this node."""
-
- return len(self._entries)
-
- def __contains__(self, cvs_path):
- """Return True iff CVS_PATH is contained in this node."""
-
- return cvs_path in self._entries
-
- def __iter__(self):
- """Iterate over the CVSPaths within this node."""
-
- return self._entries.__iter__()
-
- def _format_entries(self):
- """Format the entries map for output in subclasses' __repr__() methods."""
-
- def format_item(key, value):
- if value is None:
- return str(key)
- else:
- return '%s -> %x' % (key, value,)
-
- items = self._entries.items()
- items.sort()
- return '{%s}' % (', '.join([format_item(*item) for item in items]),)
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%s<%x>' % (self.__class__.__name__, self.id,)
-
-
-class OldMirrorDirectory(MirrorDirectory):
- """Represent a historical directory within the RepositoryMirror."""
-
- def __getitem__(self, cvs_path):
- id = self._entries[cvs_path]
- if id is None:
- # This represents a leaf node.
- return None
- else:
- return OldMirrorDirectory(self.repo, id, self.repo._node_db[id])
-
- def __repr__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%s(%s)' % (self, self._format_entries(),)
-
-
-class CurrentMirrorDirectory(MirrorDirectory):
- """Represent a directory that currently exists in the RepositoryMirror."""
-
- def __init__(self, repo, id, lod, cvs_path, entries):
- MirrorDirectory.__init__(self, repo, id, entries)
- self.lod = lod
- self.cvs_path = cvs_path
-
- def __getitem__(self, cvs_path):
- id = self._entries[cvs_path]
- if id is None:
- # This represents a leaf node.
- return None
- else:
- try:
- return self.repo._new_nodes[id]
- except KeyError:
- return _CurrentMirrorReadOnlySubdirectory(
- self.repo, id, self.lod, cvs_path, self,
- self.repo._node_db[id]
- )
-
- def __setitem__(self, cvs_path, node):
- """Create or overwrite a subnode of this node.
-
- CVS_PATH is the path of the subnode. NODE will be the new value
- of the node; for CVSDirectories it should be a MirrorDirectory
- instance; for CVSFiles it should be None."""
-
- if isinstance(node, DeletedCurrentMirrorDirectory):
- raise DeletedNodeReusedError(
- '%r has already been deleted and should not be reused' % (node,)
- )
- elif isinstance(node, CurrentMirrorDirectory):
- raise CopyFromCurrentNodeError(
- '%r was created in the current node and cannot be copied' % (node,)
- )
- else:
- self._set_entry(cvs_path, node)
-
- def __delitem__(self, cvs_path):
- """Remove the subnode of this node at CVS_PATH.
-
- If the node does not exist, then raise a KeyError."""
-
- node = self[cvs_path]
- self._del_entry(cvs_path)
- if isinstance(node, _WritableMirrorDirectoryMixin):
- node._mark_deleted()
-
- def mkdir(self, cvs_directory):
- """Create an empty subdirectory of this node at CVS_PATH.
-
- Return the CurrentDirectory that was created."""
-
- assert isinstance(cvs_directory, CVSDirectory)
- if cvs_directory in self:
- raise PathExistsError(
- 'Attempt to create directory \'%s\' in %s in repository mirror '
- 'when it already exists.'
- % (cvs_directory, self.lod,)
- )
-
- new_node = _CurrentMirrorWritableSubdirectory(
- self.repo, self.repo._key_generator.gen_id(), self.lod, cvs_directory,
- self, {}
- )
- self._set_entry(cvs_directory, new_node)
- self.repo._new_nodes[new_node.id] = new_node
- return new_node
-
- def add_file(self, cvs_file):
- """Create a file within this node at CVS_FILE."""
-
- assert isinstance(cvs_file, CVSFile)
- if cvs_file in self:
- raise PathExistsError(
- 'Attempt to create file \'%s\' in %s in repository mirror '
- 'when it already exists.'
- % (cvs_file, self.lod,)
- )
-
- self._set_entry(cvs_file, None)
-
- def __repr__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return '%s(%r, %r, %s)' % (
- self, self.lod, self.cvs_path, self._format_entries(),
- )
-
-
-class DeletedCurrentMirrorDirectory(object):
- """A MirrorDirectory that has been deleted.
-
- A MirrorDirectory that used to be a _WritableMirrorDirectoryMixin
- but then was deleted. Such instances are turned into this class so
- that nobody can accidentally mutate them again."""
-
- pass
-
-
-class _WritableMirrorDirectoryMixin:
- """Mixin for MirrorDirectories that are already writable.
-
- A MirrorDirectory is writable if it has already been recreated
- during the current revision."""
-
- def _set_entry(self, cvs_path, node):
- """Create or overwrite a subnode of this node, with no checks."""
-
- if node is None:
- self._entries[cvs_path] = None
- else:
- self._entries[cvs_path] = node.id
-
- def _del_entry(self, cvs_path):
- """Remove the subnode of this node at CVS_PATH, with no checks."""
-
- del self._entries[cvs_path]
-
- def _mark_deleted(self):
- """Mark this object and any writable descendants as being deleted."""
-
- self.__class__ = DeletedCurrentMirrorDirectory
-
- for (cvs_path, id) in self._entries.iteritems():
- if id in self.repo._new_nodes:
- node = self[cvs_path]
- if isinstance(node, _WritableMirrorDirectoryMixin):
- # Mark deleted and recurse:
- node._mark_deleted()
-
-
-class _ReadOnlyMirrorDirectoryMixin:
- """Mixin for a CurrentMirrorDirectory that hasn't yet been made writable."""
-
- def _make_writable(self):
- raise NotImplementedError()
-
- def _set_entry(self, cvs_path, node):
- """Create or overwrite a subnode of this node, with no checks."""
-
- self._make_writable()
- self._set_entry(cvs_path, node)
-
- def _del_entry(self, cvs_path):
- """Remove the subnode of this node at CVS_PATH, with no checks."""
-
- self._make_writable()
- self._del_entry(cvs_path)
-
-
-class CurrentMirrorLODDirectory(CurrentMirrorDirectory):
- """Represent an LOD's main directory in the mirror's current version."""
-
- def __init__(self, repo, id, lod, entries):
- CurrentMirrorDirectory.__init__(
- self, repo, id, lod, lod.project.get_root_cvs_directory(), entries
- )
-
- def delete(self):
- """Remove the directory represented by this object."""
-
- lod_history = self.repo._get_lod_history(self.lod)
- assert lod_history.exists()
- lod_history.update(self.repo._youngest, None)
- self._mark_deleted()
-
-
-class _CurrentMirrorReadOnlyLODDirectory(
- CurrentMirrorLODDirectory, _ReadOnlyMirrorDirectoryMixin
- ):
- """Represent an LOD's main directory in the mirror's current version."""
-
- def _make_writable(self):
- self.__class__ = _CurrentMirrorWritableLODDirectory
- # Create a new ID:
- self.id = self.repo._key_generator.gen_id()
- self.repo._new_nodes[self.id] = self
- self.repo._get_lod_history(self.lod).update(self.repo._youngest, self.id)
- self._entries = self._entries.copy()
-
-
-class _CurrentMirrorWritableLODDirectory(
- CurrentMirrorLODDirectory, _WritableMirrorDirectoryMixin
- ):
- pass
-
-
-class CurrentMirrorSubdirectory(CurrentMirrorDirectory):
- """Represent a subdirectory in the mirror's current version."""
-
- def __init__(self, repo, id, lod, cvs_path, parent_mirror_dir, entries):
- CurrentMirrorDirectory.__init__(self, repo, id, lod, cvs_path, entries)
- self.parent_mirror_dir = parent_mirror_dir
-
- def delete(self):
- """Remove the directory represented by this object."""
-
- del self.parent_mirror_dir[self.cvs_path]
-
-
-class _CurrentMirrorReadOnlySubdirectory(
- CurrentMirrorSubdirectory, _ReadOnlyMirrorDirectoryMixin
- ):
- """Represent a subdirectory in the mirror's current version."""
-
- def _make_writable(self):
- self.__class__ = _CurrentMirrorWritableSubdirectory
- # Create a new ID:
- self.id = self.repo._key_generator.gen_id()
- self.repo._new_nodes[self.id] = self
- self.parent_mirror_dir._set_entry(self.cvs_path, self)
- self._entries = self._entries.copy()
-
-
-class _CurrentMirrorWritableSubdirectory(
- CurrentMirrorSubdirectory, _WritableMirrorDirectoryMixin
- ):
- pass
-
-
-class LODHistory(object):
- """The history of root nodes for a line of development.
-
- Members:
-
- _mirror -- (RepositoryMirror) the RepositoryMirror that manages
- this LODHistory.
-
- lod -- (LineOfDevelopment) the LOD described by this LODHistory.
-
- revnums -- (list of int) the revision numbers in which the id
- changed, in numerical order.
-
- ids -- (list of (int or None)) the ID of the node describing the
- root of this LOD starting at the corresponding revision
- number, or None if the LOD did not exist in that revision.
-
- To find the root id for a given revision number, a binary search is
- done within REVNUMS to find the index of the most recent revision at
- the time of REVNUM, then that index is used to read the id out of
- IDS.
-
- A sentry is written at the zeroth index of both arrays to describe
- the initial situation, namely, that the LOD doesn't exist in
- revision r0."""
-
- __slots__ = ['_mirror', 'lod', 'revnums', 'ids']
-
- def __init__(self, mirror, lod):
- self._mirror = mirror
- self.lod = lod
- self.revnums = [0]
- self.ids = [None]
-
- def get_id(self, revnum):
- """Get the ID of the root path for this LOD in REVNUM.
-
- Raise KeyError if this LOD didn't exist in REVNUM."""
-
- index = bisect.bisect_right(self.revnums, revnum) - 1
- id = self.ids[index]
-
- if id is None:
- raise KeyError()
-
- return id
-
- def get_current_id(self):
- """Get the ID of the root path for this LOD in the current revision.
-
- Raise KeyError if this LOD doesn't currently exist."""
-
- id = self.ids[-1]
-
- if id is None:
- raise KeyError()
-
- return id
-
- def exists(self):
- """Return True iff LOD exists in the current revision."""
-
- return self.ids[-1] is not None
-
- def update(self, revnum, id):
- """Indicate that the root node of this LOD changed to ID at REVNUM.
-
- REVNUM is a revision number that must be the same as that of the
- previous recorded change (in which case the previous change is
- overwritten) or later (in which the new change is appended).
-
- ID can be a node ID, or it can be None to indicate that this LOD
- ceased to exist in REVNUM."""
-
- if revnum < self.revnums[-1]:
- raise KeyError()
- elif revnum == self.revnums[-1]:
- # This is an attempt to overwrite an entry that was already
- # updated during this revision. Don't allow the replacement
- # None -> None or allow one new id to be replaced with another:
- old_id = self.ids[-1]
- if old_id is None and id is None:
- raise InternalError(
- 'ID changed from None -> None for %s, r%d' % (self.lod, revnum,)
- )
- elif (old_id is not None and id is not None
- and old_id in self._mirror._new_nodes):
- raise InternalError(
- 'ID changed from %x -> %x for %s, r%d'
- % (old_id, id, self.lod, revnum,)
- )
- self.ids[-1] = id
- else:
- self.revnums.append(revnum)
- self.ids.append(id)
-
-
-class _NodeDatabase(object):
- """A database storing all of the directory nodes.
-
- The nodes are written in groups every time write_new_nodes() is
- called. To the database is written a dictionary {node_id :
- [(cvs_path.id, node_id),...]}, where the keys are the node_ids of
- the new nodes. When a node is read, its whole group is read and
- cached under the assumption that the other nodes in the group are
- likely to be needed soon. The cache is retained across revisions
- and cleared when _cache_max_size is exceeded.
-
- The dictionaries for nodes that have been read from the database
- during the current revision are cached by node_id in the _cache
- member variable. The corresponding dictionaries are *not* copied
- when read. To avoid cross-talk between distinct MirrorDirectory
- instances that have the same node_id, users of these dictionaries
- have to copy them before modification."""
-
- # How many entries should be allowed in the cache for each
- # CVSDirectory in the repository. (This number is very roughly the
- # number of complete lines of development that can be stored in the
- # cache at one time.)
- CACHE_SIZE_MULTIPLIER = 5
-
- # But the cache will never be limited to less than this number:
- MIN_CACHE_LIMIT = 5000
-
- def __init__(self):
- self.cvs_file_db = Ctx()._cvs_file_db
- self.db = IndexedDatabase(
- artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
- artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
- DB_OPEN_NEW, serializer=MarshalSerializer(),
- )
-
- # A list of the maximum node_id stored by each call to
- # write_new_nodes():
- self._max_node_ids = [0]
-
- # A map {node_id : {cvs_path : node_id}}:
- self._cache = {}
-
- # The number of directories in the repository:
- num_dirs = len([
- cvs_path
- for cvs_path in self.cvs_file_db.itervalues()
- if isinstance(cvs_path, CVSDirectory)
- ])
-
- self._cache_max_size = max(
- int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
- self.MIN_CACHE_LIMIT,
- )
-
- def _load(self, items):
- retval = {}
- for (id, value) in items:
- retval[self.cvs_file_db.get_file(id)] = value
- return retval
-
- def _dump(self, node):
- return [
- (cvs_path.id, value)
- for (cvs_path, value) in node.iteritems()
- ]
-
- def _determine_index(self, id):
- """Return the index of the record holding the node with ID."""
-
- return bisect.bisect_left(self._max_node_ids, id)
-
- def __getitem__(self, id):
- try:
- items = self._cache[id]
- except KeyError:
- index = self._determine_index(id)
- for (node_id, items) in self.db[index].items():
- self._cache[node_id] = self._load(items)
- items = self._cache[id]
-
- return items
-
- def write_new_nodes(self, nodes):
- """Write NODES to the database.
-
- NODES is an iterable of writable CurrentMirrorDirectory instances."""
-
- if len(self._cache) > self._cache_max_size:
- # The size of the cache has exceeded the threshold. Discard the
- # old cache values (but still store the new nodes into the
- # cache):
- Log().debug('Clearing node cache')
- self._cache.clear()
-
- data = {}
- max_node_id = 0
- for node in nodes:
- max_node_id = max(max_node_id, node.id)
- data[node.id] = self._dump(node._entries)
- self._cache[node.id] = node._entries
-
- self.db[len(self._max_node_ids)] = data
-
- if max_node_id == 0:
- # Rewrite last value:
- self._max_node_ids.append(self._max_node_ids[-1])
- else:
- self._max_node_ids.append(max_node_id)
-
- def close(self):
- self._cache.clear()
- self.db.close()
- self.db = None
-
-
-class RepositoryMirror:
- """Mirror a repository and its history.
-
- Mirror a repository as it is constructed, one revision at a time.
- For each LineOfDevelopment we store a skeleton of the directory
- structure within that LOD for each revnum in which it changed.
-
- For each LOD that has been seen so far, an LODHistory instance is
- stored in self._lod_histories. An LODHistory keeps track of each
- revnum in which files were added to or deleted from that LOD, as
- well as the node id of the root of the node tree describing the LOD
- contents at that revision.
-
- The LOD trees themselves are stored in the _node_db database, which
- maps node ids to nodes. A node is a map from CVSPath to ids of the
- corresponding subnodes. The _node_db is stored on disk and each
- access is expensive.
-
- The _node_db database only holds the nodes for old revisions. The
- revision that is being constructed is kept in memory in the
- _new_nodes map, which is cheap to access.
-
- You must invoke start_commit() before each commit and end_commit()
- afterwards."""
-
- def register_artifacts(self, which_pass):
- """Register the artifacts that will be needed for this object."""
-
- artifact_manager.register_temp_file(
- config.MIRROR_NODES_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file(
- config.MIRROR_NODES_STORE, which_pass
- )
-
- def open(self):
- """Set up the RepositoryMirror and prepare it for commits."""
-
- self._key_generator = KeyGenerator()
-
- # A map from LOD to LODHistory instance for all LODs that have
- # been referenced so far:
- self._lod_histories = {}
-
- # This corresponds to the 'nodes' table in a Subversion fs. (We
- # don't need a 'representations' or 'strings' table because we
- # only track file existence, not file contents.)
- self._node_db = _NodeDatabase()
-
- # Start at revision 0 without a root node.
- self._youngest = 0
-
- def start_commit(self, revnum):
- """Start a new commit."""
-
- assert revnum > self._youngest
- self._youngest = revnum
-
- # A map {node_id : _WritableMirrorDirectoryMixin}.
- self._new_nodes = {}
-
- def end_commit(self):
- """Called at the end of each commit.
-
- This method copies the newly created nodes to the on-disk nodes
- db."""
-
- # Copy the new nodes to the _node_db
- self._node_db.write_new_nodes([
- node
- for node in self._new_nodes.values()
- if not isinstance(node, DeletedCurrentMirrorDirectory)
- ])
-
- del self._new_nodes
-
- def _get_lod_history(self, lod):
- """Return the LODHistory instance describing LOD.
-
- Create a new (empty) LODHistory if it doesn't yet exist."""
-
- try:
- return self._lod_histories[lod]
- except KeyError:
- lod_history = LODHistory(self, lod)
- self._lod_histories[lod] = lod_history
- return lod_history
-
- def get_old_lod_directory(self, lod, revnum):
- """Return the directory for the root path of LOD at revision REVNUM.
-
- Return an instance of MirrorDirectory if the path exists;
- otherwise, raise KeyError."""
-
- lod_history = self._get_lod_history(lod)
- id = lod_history.get_id(revnum)
- return OldMirrorDirectory(self, id, self._node_db[id])
-
- def get_old_path(self, cvs_path, lod, revnum):
- """Return the node for CVS_PATH from LOD at REVNUM.
-
- If CVS_PATH is a CVSDirectory, then return an instance of
- OldMirrorDirectory. If CVS_PATH is a CVSFile, return None.
-
- If CVS_PATH does not exist in the specified LOD and REVNUM, raise
- KeyError."""
-
- node = self.get_old_lod_directory(lod, revnum)
-
- for sub_path in cvs_path.get_ancestry()[1:]:
- node = node[sub_path]
-
- return node
-
- def get_current_lod_directory(self, lod):
- """Return the directory for the root path of LOD in the current revision.
-
- Return an instance of CurrentMirrorDirectory. Raise KeyError if
- the path doesn't already exist."""
-
- lod_history = self._get_lod_history(lod)
- id = lod_history.get_current_id()
- try:
- return self._new_nodes[id]
- except KeyError:
- return _CurrentMirrorReadOnlyLODDirectory(
- self, id, lod, self._node_db[id]
- )
-
- def get_current_path(self, cvs_path, lod):
- """Return the node for CVS_PATH from LOD in the current revision.
-
- If CVS_PATH is a CVSDirectory, then return an instance of
- CurrentMirrorDirectory. If CVS_PATH is a CVSFile, return None.
-
- If CVS_PATH does not exist in the current revision of the
- specified LOD, raise KeyError."""
-
- node = self.get_current_lod_directory(lod)
-
- for sub_path in cvs_path.get_ancestry()[1:]:
- node = node[sub_path]
-
- return node
-
- def add_lod(self, lod):
- """Create a new LOD in this repository.
-
- Return the CurrentMirrorDirectory that was created. If the LOD
- already exists, raise LODExistsError."""
-
- lod_history = self._get_lod_history(lod)
- if lod_history.exists():
- raise LODExistsError(
- 'Attempt to create %s in repository mirror when it already exists.'
- % (lod,)
- )
- new_node = _CurrentMirrorWritableLODDirectory(
- self, self._key_generator.gen_id(), lod, {}
- )
- lod_history.update(self._youngest, new_node.id)
- self._new_nodes[new_node.id] = new_node
- return new_node
-
- def copy_lod(self, src_lod, dest_lod, src_revnum):
- """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.
-
- In the youngest revision of the repository, the destination LOD
- *must not* already exist.
-
- Return the new node at DEST_LOD, as a CurrentMirrorDirectory."""
-
- # Get the node of our src_path
- src_node = self.get_old_lod_directory(src_lod, src_revnum)
-
- dest_lod_history = self._get_lod_history(dest_lod)
- if dest_lod_history.exists():
- raise LODExistsError(
- 'Attempt to copy to %s in repository mirror when it already exists.'
- % (dest_lod,)
- )
-
- dest_lod_history.update(self._youngest, src_node.id)
-
- # Return src_node, except packaged up as a CurrentMirrorDirectory:
- return self.get_current_lod_directory(dest_lod)
-
- def close(self):
- """Free resources and close databases."""
-
- self._lod_histories = None
- self._node_db.close()
- self._node_db = None
-
-
diff --git a/cvs2svn_lib/revision_manager.py b/cvs2svn_lib/revision_manager.py
deleted file mode 100644
index 8af7c74..0000000
--- a/cvs2svn_lib/revision_manager.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module describes the interface to the CVS repository."""
-
-
-class RevisionRecorder:
- """An object that can record text and deltas from CVS files."""
-
- def __init__(self):
- """Initialize the RevisionRecorder.
-
- Please note that a RevisionRecorder is instantiated in every
- program run, even if the data-collection pass will not be
- executed. (This is to allow it to register the artifacts that it
- produces.) Therefore, the __init__() method should not do much,
- and more substantial preparation for use (like actually creating
- the artifacts) should be done in start()."""
-
- pass
-
- def register_artifacts(self, which_pass):
- """Register artifacts that will be needed during data recording.
-
- WHICH_PASS is the pass that will call our callbacks, so it should
- be used to do the registering (e.g., call
- WHICH_PASS.register_temp_file() and/or
- WHICH_PASS.register_temp_file_needed())."""
-
- pass
-
- def start(self):
- """Data will soon start being collected.
-
- Any non-idempotent initialization should be done here."""
-
- pass
-
- def start_file(self, cvs_file_items):
- """Prepare to receive data for the file with the specified CVS_FILE_ITEMS.
-
- CVS_FILE_ITEMS is an instance of CVSFileItems describing the file
- dependency topology right after the file tree was parsed out of
- the RCS file. (I.e., it reflects the original CVS dependency
- structure.) Please note that the CVSFileItems instance will be
- changed later."""
-
- pass
-
- def record_text(self, cvs_rev, log, text):
- """Record information about a revision and optionally return a token.
-
- CVS_REV is a CVSRevision instance describing a revision that has
- log message LOG and text TEXT (as retrieved from the RCS file).
- (TEXT is full text for the HEAD revision, and deltas for other
- revisions.)"""
-
- raise NotImplementedError()
-
- def finish_file(self, cvs_file_items):
- """The current file is finished; finish and clean up.
-
- CVS_FILE_ITEMS is a CVSFileItems instance describing the file's
- items at the end of processing of the RCS file in CollectRevsPass.
- It may be modified relative to the CVS_FILE_ITEMS instance passed
- to the corresponding start_file() call (revisions might be
- deleted, topology changed, etc)."""
-
- pass
-
- def finish(self):
- """All recording is done; clean up."""
-
- pass
-
-
-class NullRevisionRecorder(RevisionRecorder):
- """A do-nothing variety of RevisionRecorder."""
-
- def record_text(self, cvs_rev, log, text):
- return None
-
-
-class RevisionExcluder:
- """An interface for informing a RevisionReader about excluded revisions.
-
- Currently, revisions can be excluded via the --exclude option and
- various fixups for CVS peculiarities. This interface can be used to
- inform the associated RevisionReader about CVSItems that are being
- excluded. (The recorder might use that information to free some
- temporary data or adjust its expectations about which revisions will
- later be read.)"""
-
- def __init__(self):
- """Initialize the RevisionExcluder.
-
- Please note that a RevisionExcluder is instantiated in every
- program run, even if the branch-exclusion pass will not be
- executed. (This is to allow its register_artifacts() method to be
- called.) Therefore, the __init__() method should not do much, and
- more substantial preparation for use (like actually creating the
- artifacts) should be done in start()."""
-
- pass
-
- def register_artifacts(self, which_pass):
- """Register artifacts that will be needed during branch exclusion.
-
- WHICH_PASS is the pass that will call our callbacks, so it should
- be used to do the registering (e.g., call
- WHICH_PASS.register_temp_file() and/or
- WHICH_PASS.register_temp_file_needed())."""
-
- pass
-
- def start(self):
- """Prepare to handle branch exclusions."""
-
- pass
-
- def process_file(self, cvs_file_items):
- """Called for files whose trees were modified in FilterSymbolsPass.
-
- This callback is called once for each CVSFile whose topology was
- modified in FilterSymbolsPass."""
-
- raise NotImplementedError()
-
- def finish(self):
- """Called after all branch exclusions for all files are done."""
-
- pass
-
-
-class NullRevisionExcluder(RevisionExcluder):
- """A do-nothing variety of RevisionExcluder."""
-
- def process_file(self, cvs_file_items):
- pass
-
-
-class RevisionReader(object):
- """An object that can read the contents of CVSRevisions."""
-
- def register_artifacts(self, which_pass):
- """Register artifacts that will be needed during branch exclusion.
-
- WHICH_PASS is the pass that will call our callbacks, so it should
- be used to do the registering (e.g., call
- WHICH_PASS.register_temp_file() and/or
- WHICH_PASS.register_temp_file_needed())."""
-
- pass
-
- def start(self):
- """Prepare for calls to get_content_stream."""
-
- pass
-
- def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
- """Return a file-like object from which the contents of CVS_REV
- can be read.
-
- CVS_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION is
- True, then suppress the substitution of RCS/CVS keywords in the
- output."""
-
- raise NotImplementedError
-
- def finish(self):
- """Inform the reader that all calls to get_content_stream are done.
- Start may be called again at a later point."""
-
- pass
-
-
diff --git a/cvs2svn_lib/run_options.py b/cvs2svn_lib/run_options.py
deleted file mode 100644
index 27d2ea6..0000000
--- a/cvs2svn_lib/run_options.py
+++ /dev/null
@@ -1,1035 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes to set common cvs2xxx run options."""
-
-import sys
-import re
-import optparse
-from optparse import OptionGroup
-import time
-
-from cvs2svn_lib.version import VERSION
-from cvs2svn_lib import config
-from cvs2svn_lib.common import warning_prefix
-from cvs2svn_lib.common import error_prefix
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import CVSTextDecoder
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.man_writer import ManOption
-from cvs2svn_lib.pass_manager import InvalidPassError
-from cvs2svn_lib.symbol_strategy import AllBranchRule
-from cvs2svn_lib.symbol_strategy import AllTagRule
-from cvs2svn_lib.symbol_strategy import BranchIfCommitsRule
-from cvs2svn_lib.symbol_strategy import ExcludeRegexpStrategyRule
-from cvs2svn_lib.symbol_strategy import ForceBranchRegexpStrategyRule
-from cvs2svn_lib.symbol_strategy import ForceTagRegexpStrategyRule
-from cvs2svn_lib.symbol_strategy import ExcludeTrivialImportBranchRule
-from cvs2svn_lib.symbol_strategy import HeuristicStrategyRule
-from cvs2svn_lib.symbol_strategy import UnambiguousUsageRule
-from cvs2svn_lib.symbol_strategy import HeuristicPreferredParentRule
-from cvs2svn_lib.symbol_strategy import SymbolHintsFileRule
-from cvs2svn_lib.symbol_transform import ReplaceSubstringsSymbolTransform
-from cvs2svn_lib.symbol_transform import RegexpSymbolTransform
-from cvs2svn_lib.symbol_transform import NormalizePathsSymbolTransform
-from cvs2svn_lib.property_setters import AutoPropsPropertySetter
-from cvs2svn_lib.property_setters import CVSBinaryFileDefaultMimeTypeSetter
-from cvs2svn_lib.property_setters import CVSBinaryFileEOLStyleSetter
-from cvs2svn_lib.property_setters import CVSRevisionNumberSetter
-from cvs2svn_lib.property_setters import DefaultEOLStyleSetter
-from cvs2svn_lib.property_setters import EOLStyleFromMimeTypeSetter
-from cvs2svn_lib.property_setters import ExecutablePropertySetter
-from cvs2svn_lib.property_setters import KeywordsPropertySetter
-from cvs2svn_lib.property_setters import MimeMapper
-from cvs2svn_lib.property_setters import SVNBinaryFileKeywordsPropertySetter
-
-
-usage = """\
-Usage: %prog --options OPTIONFILE
- %prog [OPTION...] OUTPUT-OPTION CVS-REPOS-PATH"""
-
-description="""\
-Convert a CVS repository into a Subversion repository, including history.
-"""
-
-authors = u"""\
-Main authors are:
-.br
-C. Michael Pilato <cmpilato@collab.net>
-.br
-Greg Stein <gstein@lyra.org>
-.br
-Branko \u010cibej <brane@xbc.nu>
-.br
-Blair Zajac <blair@orcaware.com>
-.br
-Max Bowsher <maxb@ukf.net>
-.br
-Brian Fitzpatrick <fitz@red-bean.com>
-.br
-Tobias Ringstr\u00f6m <tobias@ringstrom.mine.nu>
-.br
-Karl Fogel <kfogel@collab.net>
-.br
-Erik H\u00fclsmann <e.huelsmann@gmx.net>
-.br
-David Summers <david@summersoft.fay.ar.us>
-.br
-Michael Haggerty <mhagger@alum.mit.edu>
-.PP
-Manpage was written for the Debian GNU/Linux system by
-Laszlo 'GCS' Boszormenyi <gcs@lsc.hu> (but may be used by others).
-"""
-
-
-class IncompatibleOption(ManOption):
- """A ManOption that is incompatible with the --options option.
-
- Record that the option was used so that error checking can later be
- done."""
-
- def __init__(self, *args, **kw):
- ManOption.__init__(self, *args, **kw)
-
- def take_action(self, action, dest, opt, value, values, parser):
- oio = parser.values.options_incompatible_options
- if opt not in oio:
- oio.append(opt)
- return ManOption.take_action(
- self, action, dest, opt, value, values, parser
- )
-
-
-class ContextOption(ManOption):
- """A ManOption that stores its value to Ctx."""
-
- def __init__(self, *args, **kw):
- if kw.get('action') not in self.STORE_ACTIONS:
- raise ValueError('Invalid action: %s' % (kw['action'],))
-
- self.__compatible_with_option = kw.pop('compatible_with_option', False)
- self.__action = kw.pop('action')
- try:
- self.__dest = kw.pop('dest')
- except KeyError:
- opt = args[0]
- if not opt.startswith('--'):
- raise ValueError
- self.__dest = opt[2:].replace('-', '_')
- if 'const' in kw:
- self.__const = kw.pop('const')
-
- kw['action'] = 'callback'
- kw['callback'] = self.__callback
-
- ManOption.__init__(self, *args, **kw)
-
- def __callback(self, option, opt_str, value, parser):
- if not self.__compatible_with_option:
- oio = parser.values.options_incompatible_options
- if opt_str not in oio:
- oio.append(opt_str)
-
- action = self.__action
- dest = self.__dest
-
- if action == "store":
- setattr(Ctx(), dest, value)
- elif action == "store_const":
- setattr(Ctx(), dest, self.__const)
- elif action == "store_true":
- setattr(Ctx(), dest, True)
- elif action == "store_false":
- setattr(Ctx(), dest, False)
- elif action == "append":
- getattr(Ctx(), dest).append(value)
- elif action == "count":
- setattr(Ctx(), dest, getattr(Ctx(), dest, 0) + 1)
- else:
- raise RuntimeError("unknown action %r" % self.__action)
-
- return 1
-
-
-class IncompatibleOptionsException(FatalError):
- pass
-
-
-# Options that are not allowed to be used with --trunk-only:
-SYMBOL_OPTIONS = [
- '--symbol-transform',
- '--symbol-hints',
- '--force-branch',
- '--force-tag',
- '--exclude',
- '--keep-trivial-imports',
- '--symbol-default',
- '--no-cross-branch-commits',
- ]
-
-class SymbolOptionsWithTrunkOnlyException(IncompatibleOptionsException):
- def __init__(self):
- IncompatibleOptionsException.__init__(
- self,
- 'The following symbol-related options cannot be used together\n'
- 'with --trunk-only:\n'
- ' %s'
- % ('\n '.join(SYMBOL_OPTIONS),)
- )
-
-
-def not_both(opt1val, opt1name, opt2val, opt2name):
- """Raise an exception if both opt1val and opt2val are set."""
- if opt1val and opt2val:
- raise IncompatibleOptionsException(
- "cannot pass both '%s' and '%s'." % (opt1name, opt2name,)
- )
-
-
-class RunOptions(object):
- """A place to store meta-options that are used to start the conversion."""
-
- def __init__(self, progname, cmd_args, pass_manager):
- """Process the command-line options, storing run options to SELF.
-
- PROGNAME is the name of the program, used in the usage string.
- CMD_ARGS is the list of command-line arguments passed to the
- program. PASS_MANAGER is an instance of PassManager, needed to
- help process the -p and --help-passes options."""
-
- self.progname = progname
- self.cmd_args = cmd_args
- self.pass_manager = pass_manager
- self.start_pass = 1
- self.end_pass = self.pass_manager.num_passes
- self.profiling = False
-
- self.projects = []
-
- # A list of one list of SymbolStrategyRules for each project:
- self.project_symbol_strategy_rules = []
-
- parser = self.parser = optparse.OptionParser(
- usage=usage,
- description=self.get_description(),
- add_help_option=False,
- )
- # A place to record any options used that are incompatible with
- # --options:
- parser.set_default('options_incompatible_options', [])
-
- # Populate the options parser with the options, one group at a
- # time:
- parser.add_option_group(self._get_options_file_options_group())
- parser.add_option_group(self._get_output_options_group())
- parser.add_option_group(self._get_conversion_options_group())
- parser.add_option_group(self._get_symbol_handling_options_group())
- parser.add_option_group(self._get_subversion_properties_options_group())
- parser.add_option_group(self._get_extraction_options_group())
- parser.add_option_group(self._get_environment_options_group())
- parser.add_option_group(self._get_partial_conversion_options_group())
- parser.add_option_group(self._get_information_options_group())
-
- (self.options, self.args) = parser.parse_args(args=self.cmd_args)
-
- # Now the log level has been set; log the time when the run started:
- Log().verbose(
- time.strftime(
- 'Conversion start time: %Y-%m-%d %I:%M:%S %Z',
- time.localtime(Log().start_time)
- )
- )
-
- if self.options.options_file_found:
- # Check that no options that are incompatible with --options
- # were used:
- self.verify_option_compatibility()
- else:
- # --options was not specified. So do the main initialization
- # based on other command-line options:
- self.process_options()
-
- # Check for problems with the options:
- self.check_options()
-
- def get_description(self):
- return description
-
- def _get_options_file_options_group(self):
- group = OptionGroup(
- self.parser, 'Configuration via options file'
- )
- self.parser.set_default('options_file_found', False)
- group.add_option(ManOption(
- '--options', type='string',
- action='callback', callback=self.callback_options,
- help=(
- 'read the conversion options from PATH. This '
- 'method allows more flexibility than using '
- 'command-line options. See documentation for info'
- ),
- man_help=(
- 'Read the conversion options from \\fIpath\\fR instead of from '
- 'the command line. This option allows far more conversion '
- 'flexibility than can be achieved using the command-line alone. '
- 'See the documentation for more information. Only the following '
- 'command-line options are allowed in combination with '
- '\\fB--options\\fR: \\fB-h\\fR/\\fB--help\\fR, '
- '\\fB--help-passes\\fR, \\fB--version\\fR, '
- '\\fB-v\\fR/\\fB--verbose\\fR, \\fB-q\\fR/\\fB--quiet\\fR, '
- '\\fB-p\\fR/\\fB--pass\\fR/\\fB--passes\\fR, \\fB--dry-run\\fR, '
- '\\fB--profile\\fR, \\fB--sort\\fR, \\fB--trunk-only\\fR, '
- '\\fB--encoding\\fR, and \\fB--fallback-encoding\\fR. '
- 'Options are processed in the order specified on the command '
- 'line.'
- ),
- metavar='PATH',
- ))
- return group
-
- def _get_output_options_group(self):
- group = OptionGroup(self.parser, 'Output options')
- return group
-
- def _get_conversion_options_group(self):
- group = OptionGroup(self.parser, 'Conversion options')
- group.add_option(ContextOption(
- '--trunk-only',
- action='store_true',
- compatible_with_option=True,
- help='convert only trunk commits, not tags nor branches',
- man_help=(
- 'Convert only trunk commits, not tags nor branches.'
- ),
- ))
- group.add_option(ManOption(
- '--encoding', type='string',
- action='callback', callback=self.callback_encoding,
- help=(
- 'encoding for paths and log messages in CVS repos. '
- 'If option is specified multiple times, encoders '
- 'are tried in order until one succeeds. See '
- 'http://docs.python.org/lib/standard-encodings.html '
- 'for a list of standard Python encodings.'
- ),
- man_help=(
- 'Use \\fIencoding\\fR as the encoding for filenames, log '
- 'messages, and author names in the CVS repos. This option '
- 'may be specified multiple times, in which case the encodings '
- 'are tried in order until one succeeds. Default: ascii. See '
- 'http://docs.python.org/lib/standard-encodings.html for a list '
- 'of other standard encodings.'
- ),
- metavar='ENC',
- ))
- group.add_option(ManOption(
- '--fallback-encoding', type='string',
- action='callback', callback=self.callback_fallback_encoding,
- help='If all --encodings fail, use lossy encoding with ENC',
- man_help=(
- 'If none of the encodings specified with \\fB--encoding\\fR '
- 'succeed in decoding an author name or log message, then fall '
- 'back to using \\fIencoding\\fR in lossy \'replace\' mode. '
- 'Use of this option may cause information to be lost, but at '
- 'least it allows the conversion to run to completion. This '
- 'option only affects the encoding of log messages and author '
- 'names; there is no fallback encoding for filenames. (By '
- 'using an \\fB--options\\fR file, it is possible to specify '
- 'a fallback encoding for filenames.) Default: disabled.'
- ),
- metavar='ENC',
- ))
- group.add_option(ContextOption(
- '--retain-conflicting-attic-files',
- action='store_true',
- help=(
- 'if a file appears both in and out of '
- 'the CVS Attic, then leave the attic version in a '
- 'SVN directory called "Attic"'
- ),
- man_help=(
- 'If a file appears both inside an outside of the CVS attic, '
- 'retain the attic version in an SVN subdirectory called '
- '\'Attic\'. (Normally this situation is treated as a fatal '
- 'error.)'
- ),
- ))
-
- return group
-
- def _get_symbol_handling_options_group(self):
- group = OptionGroup(self.parser, 'Symbol handling')
- self.parser.set_default('symbol_transforms', [])
- group.add_option(IncompatibleOption(
- '--symbol-transform', type='string',
- action='callback', callback=self.callback_symbol_transform,
- help=(
- 'transform symbol names from P to S, where P and S '
- 'use Python regexp and reference syntax '
- 'respectively. P must match the whole symbol name'
- ),
- man_help=(
- 'Transform RCS/CVS symbol names before entering them into '
- 'Subversion. \\fIpattern\\fR is a Python regexp pattern that '
- 'is matches against the entire symbol name; \\fIreplacement\\fR '
- 'is a replacement using Python\'s regexp reference syntax. '
- 'You may specify any number of these options; they will be '
- 'applied in the order given on the command line.'
- ),
- metavar='P:S',
- ))
- self.parser.set_default('symbol_strategy_rules', [])
- group.add_option(IncompatibleOption(
- '--symbol-hints', type='string',
- action='callback', callback=self.callback_symbol_hints,
- help='read symbol conversion hints from PATH',
- man_help=(
- 'Read symbol conversion hints from \\fIpath\\fR. The format of '
- '\\fIpath\\fR is the same as the format output by '
- '\\fB--write-symbol-info\\fR, namely a text file with four '
- 'whitespace-separated columns: \\fIproject-id\\fR, '
- '\\fIsymbol\\fR, \\fIconversion\\fR, and '
- '\\fIparent-lod-name\\fR. \\fIproject-id\\fR is the numerical '
- 'ID of the project to which the symbol belongs, counting from '
- '0. \\fIproject-id\\fR can be set to \'.\' if '
- 'project-specificity is not needed. \\fIsymbol-name\\fR is the '
- 'name of the symbol being specified. \\fIconversion\\fR '
- 'specifies how the symbol should be converted, and can be one '
- 'of the values \'branch\', \'tag\', or \'exclude\'. If '
- '\\fIconversion\\fR is \'.\', then this rule does not affect '
- 'how the symbol is converted. \\fIparent-lod-name\\fR is the '
- 'name of the symbol from which this symbol should sprout, or '
- '\'.trunk.\' if the symbol should sprout from trunk. If '
- '\\fIparent-lod-name\\fR is omitted or \'.\', then this rule '
- 'does not affect the preferred parent of this symbol. The file '
- 'may contain blank lines or comment lines (lines whose first '
- 'non-whitespace character is \'#\').'
- ),
- metavar='PATH',
- ))
- self.parser.set_default('symbol_default', 'heuristic')
- group.add_option(IncompatibleOption(
- '--symbol-default', type='choice',
- choices=['heuristic', 'strict', 'branch', 'tag'],
- action='store',
- help=(
- 'specify how ambiguous symbols are converted. '
- 'OPT is "heuristic" (default), "strict", "branch", '
- 'or "tag"'
- ),
- man_help=(
- 'Specify how to convert ambiguous symbols (those that appear in '
- 'the CVS archive as both branches and tags). \\fIopt\\fR must '
- 'be \'heuristic\' (decide how to treat each ambiguous symbol '
- 'based on whether it was used more often as a branch/tag in '
- 'CVS), \'strict\' (no default; every ambiguous symbol has to be '
- 'resolved manually using \\fB--force-branch\\fR, '
- '\\fB--force-tag\\fR, or \\fB--exclude\\fR), \'branch\' (treat '
- 'every ambiguous symbol as a branch), or \'tag\' (treat every '
- 'ambiguous symbol as a tag). The default is \'heuristic\'.'
- ),
- metavar='OPT',
- ))
- group.add_option(IncompatibleOption(
- '--force-branch', type='string',
- action='callback', callback=self.callback_force_branch,
- help='force symbols matching REGEXP to be branches',
- man_help=(
- 'Force symbols whose names match \\fIregexp\\fR to be branches. '
- '\\fIregexp\\fR must match the whole symbol name.'
- ),
- metavar='REGEXP',
- ))
- group.add_option(IncompatibleOption(
- '--force-tag', type='string',
- action='callback', callback=self.callback_force_tag,
- help='force symbols matching REGEXP to be tags',
- man_help=(
- 'Force symbols whose names match \\fIregexp\\fR to be tags. '
- '\\fIregexp\\fR must match the whole symbol name.'
- ),
- metavar='REGEXP',
- ))
- group.add_option(IncompatibleOption(
- '--exclude', type='string',
- action='callback', callback=self.callback_exclude,
- help='exclude branches and tags matching REGEXP',
- man_help=(
- 'Exclude branches and tags whose names match \\fIregexp\\fR '
- 'from the conversion. \\fIregexp\\fR must match the whole '
- 'symbol name.'
- ),
- metavar='REGEXP',
- ))
- self.parser.set_default('keep_trivial_imports', False)
- group.add_option(IncompatibleOption(
- '--keep-trivial-imports',
- action='store_true',
- help=(
- 'do not exclude branches that were only used for '
- 'a single import (usually these are unneeded)'
- ),
- man_help=(
- 'Do not exclude branches that were only used for a single '
- 'import. (By default such branches are excluded because they '
- 'are usually created by the inappropriate use of \\fBcvs '
- 'import\\fR.)'
- ),
- ))
-
- return group
-
- def _get_subversion_properties_options_group(self):
- group = OptionGroup(self.parser, 'Subversion properties')
- group.add_option(ContextOption(
- '--username', type='string',
- action='store',
- help='username for cvs2svn-synthesized commits',
- man_help=(
- 'Set the default username to \\fIname\\fR when cvs2svn needs '
- 'to generate a commit for which CVS does not record the '
- 'original username. This happens when a branch or tag is '
- 'created. The default is to use no author at all for such '
- 'commits.'
- ),
- metavar='NAME',
- ))
- self.parser.set_default('auto_props_files', [])
- group.add_option(IncompatibleOption(
- '--auto-props', type='string',
- action='append', dest='auto_props_files',
- help=(
- 'set file properties from the auto-props section '
- 'of a file in svn config format'
- ),
- man_help=(
- 'Specify a file in the format of Subversion\'s config file, '
- 'whose [auto-props] section can be used to set arbitrary '
- 'properties on files in the Subversion repository based on '
- 'their filenames. (The [auto-props] section header must be '
- 'present; other sections of the config file, including the '
- 'enable-auto-props setting, are ignored.) Filenames are matched '
- 'to the filename patterns case-insensitively.'
-
- ),
- metavar='FILE',
- ))
- self.parser.set_default('mime_types_files', [])
- group.add_option(IncompatibleOption(
- '--mime-types', type='string',
- action='append', dest='mime_types_files',
- help=(
- 'specify an apache-style mime.types file for setting '
- 'svn:mime-type'
- ),
- man_help=(
- 'Specify an apache-style mime.types \\fIfile\\fR for setting '
- 'svn:mime-type.'
- ),
- metavar='FILE',
- ))
- self.parser.set_default('eol_from_mime_type', False)
- group.add_option(IncompatibleOption(
- '--eol-from-mime-type',
- action='store_true',
- help='set svn:eol-style from mime type if known',
- man_help=(
- 'For files that don\'t have the kb expansion mode but have a '
- 'known mime type, set the eol-style based on the mime type. '
- 'For such files, set svn:eol-style to "native" if the mime type '
- 'begins with "text/", and leave it unset (i.e., no EOL '
- 'translation) otherwise. Files with unknown mime types are '
- 'not affected by this option. This option has no effect '
- 'unless the \\fB--mime-types\\fR option is also specified.'
- ),
- ))
- group.add_option(IncompatibleOption(
- '--default-eol', type='choice',
- choices=['binary', 'native', 'CRLF', 'LF', 'CR'],
- action='store',
- help=(
- 'default svn:eol-style for non-binary files with '
- 'undetermined mime types. STYLE is "binary" '
- '(default), "native", "CRLF", "LF", or "CR"'
- ),
- man_help=(
- 'Set svn:eol-style to \\fIstyle\\fR for files that don\'t have '
- 'the CVS \'kb\' expansion mode and whose end-of-line '
- 'translation mode hasn\'t been determined by one of the other '
- 'options. \\fIstyle\\fR must be \'binary\' (default), '
- '\'native\', \'CRLF\', \'LF\', or \'CR\'.'
- ),
- metavar='STYLE',
- ))
- self.parser.set_default('keywords_off', False)
- group.add_option(IncompatibleOption(
- '--keywords-off',
- action='store_true',
- help=(
- 'don\'t set svn:keywords on any files (by default, '
- 'cvs2svn sets svn:keywords on non-binary files to "%s")'
- % (config.SVN_KEYWORDS_VALUE,)
- ),
- man_help=(
- 'By default, cvs2svn sets svn:keywords on CVS files to "author '
- 'id date" if the mode of the RCS file in question is either kv, '
- 'kvl or unset. If you use the --keywords-off switch, cvs2svn '
- 'will not set svn:keywords for any file. While this will not '
- 'touch the keywords in the contents of your files, Subversion '
- 'will not expand them.'
- ),
- ))
- group.add_option(ContextOption(
- '--keep-cvsignore',
- action='store_true',
- help=(
- 'keep .cvsignore files (in addition to creating '
- 'the analogous svn:ignore properties)'
- ),
- man_help=(
- 'Include \\fI.cvsignore\\fR files in the output. (Normally '
- 'they are unneeded because cvs2svn sets the corresponding '
- '\\fIsvn:ignore\\fR properties.)'
- ),
- ))
- group.add_option(IncompatibleOption(
- '--cvs-revnums',
- action='callback', callback=self.callback_cvs_revnums,
- help='record CVS revision numbers as file properties',
- man_help=(
- 'Record CVS revision numbers as file properties in the '
- 'Subversion repository. (Note that unless it is removed '
- 'explicitly, the last CVS revision number will remain '
- 'associated with the file even after the file is changed '
- 'within Subversion.)'
- ),
- ))
-
- # Deprecated options:
- group.add_option(IncompatibleOption(
- '--no-default-eol',
- action='store_const', dest='default_eol', const=None,
- help=optparse.SUPPRESS_HELP,
- man_help=optparse.SUPPRESS_HELP,
- ))
- self.parser.set_default('auto_props_ignore_case', True)
- # True is the default now, so this option has no effect:
- group.add_option(IncompatibleOption(
- '--auto-props-ignore-case',
- action='store_true',
- help=optparse.SUPPRESS_HELP,
- man_help=optparse.SUPPRESS_HELP,
- ))
-
- return group
-
- def _get_extraction_options_group(self):
- group = OptionGroup(self.parser, 'Extraction options')
-
- return group
-
- def _get_environment_options_group(self):
- group = OptionGroup(self.parser, 'Environment options')
- group.add_option(ContextOption(
- '--tmpdir', type='string',
- action='store',
- help=(
- 'directory to use for temporary data files '
- '(default "cvs2svn-tmp")'
- ),
- man_help=(
- 'Set the \\fIpath\\fR to use for temporary data. Default '
- 'is a directory called \\fIcvs2svn-tmp\\fR under the current '
- 'directory.'
- ),
- metavar='PATH',
- ))
- self.parser.set_default('co_executable', config.CO_EXECUTABLE)
- group.add_option(IncompatibleOption(
- '--co', type='string',
- action='store', dest='co_executable',
- help='path to the "co" program (required if --use-rcs)',
- man_help=(
- 'Path to the \\fIco\\fR program. (\\fIco\\fR is needed if the '
- '\\fB--use-rcs\\fR option is used.)'
- ),
- metavar='PATH',
- ))
- self.parser.set_default('cvs_executable', config.CVS_EXECUTABLE)
- group.add_option(IncompatibleOption(
- '--cvs', type='string',
- action='store', dest='cvs_executable',
- help='path to the "cvs" program (required if --use-cvs)',
- man_help=(
- 'Path to the \\fIcvs\\fR program. (\\fIcvs\\fR is needed if the '
- '\\fB--use-cvs\\fR option is used.)'
- ),
- metavar='PATH',
- ))
- group.add_option(ContextOption(
- '--sort', type='string',
- action='store', dest='sort_executable',
- compatible_with_option=True,
- help='path to the GNU "sort" program',
- man_help=(
- 'Path to the GNU \\fIsort\\fR program. (cvs2svn requires GNU '
- 'sort.)'
- ),
- metavar='PATH',
- ))
-
- return group
-
- def _get_partial_conversion_options_group(self):
- group = OptionGroup(self.parser, 'Partial conversions')
- group.add_option(ManOption(
- '--pass', type='string',
- action='callback', callback=self.callback_passes,
- help='execute only specified PASS of conversion',
- man_help=(
- 'Execute only pass \\fIpass\\fR of the conversion. '
- '\\fIpass\\fR can be specified by name or by number (see '
- '\\fB--help-passes\\fR).'
- ),
- metavar='PASS',
- ))
- group.add_option(ManOption(
- '--passes', '-p', type='string',
- action='callback', callback=self.callback_passes,
- help=(
- 'execute passes START through END, inclusive (PASS, '
- 'START, and END can be pass names or numbers)'
- ),
- man_help=(
- 'Execute passes \\fIstart\\fR through \\fIend\\fR of the '
- 'conversion (inclusive). \\fIstart\\fR and \\fIend\\fR can be '
- 'specified by name or by number (see \\fB--help-passes\\fR). '
- 'If \\fIstart\\fR or \\fIend\\fR is missing, it defaults to '
- 'the first or last pass, respectively. For this to work the '
- 'earlier passes must have been completed before on the '
- 'same CVS repository, and the generated data files must be '
- 'in the temporary directory (see \\fB--tmpdir\\fR).'
- ),
- metavar='[START]:[END]',
- ))
-
- return group
-
- def _get_information_options_group(self):
- group = OptionGroup(self.parser, 'Information options')
- group.add_option(ManOption(
- '--version',
- action='callback', callback=self.callback_version,
- help='print the version number',
- man_help='Print the version number.',
- ))
- group.add_option(ManOption(
- '--help', '-h',
- action="help",
- help='print this usage message and exit with success',
- man_help='Print the usage message and exit with success.',
- ))
- group.add_option(ManOption(
- '--help-passes',
- action='callback', callback=self.callback_help_passes,
- help='list the available passes and their numbers',
- man_help=(
- 'Print the numbers and names of the conversion passes and '
- 'exit with success.'
- ),
- ))
- group.add_option(ManOption(
- '--man',
- action='callback', callback=self.callback_manpage,
- help='write the manpage for this program to standard output',
- man_help=(
- 'Output the unix-style manpage for this program to standard '
- 'output.'
- ),
- ))
- group.add_option(ManOption(
- '--verbose', '-v',
- action='callback', callback=self.callback_verbose,
- help='verbose (may be specified twice for debug output)',
- man_help=(
- 'Print more information while running. This option may be '
- 'specified twice to output voluminous debugging information.'
- ),
- ))
- group.add_option(ManOption(
- '--quiet', '-q',
- action='callback', callback=self.callback_quiet,
- help='quiet (may be specified twice for very quiet)',
- man_help=(
- 'Print less information while running. This option may be '
- 'specified twice to suppress all non-error output.'
- ),
- ))
- group.add_option(ContextOption(
- '--write-symbol-info', type='string',
- action='store', dest='symbol_info_filename',
- help='write information and statistics about CVS symbols to PATH.',
- man_help=(
- 'Write to \\fIpath\\fR symbol statistics and information about '
- 'how symbols were converted during CollateSymbolsPass.'
- ),
- metavar='PATH',
- ))
- group.add_option(ContextOption(
- '--skip-cleanup',
- action='store_true',
- help='prevent the deletion of intermediate files',
- man_help='Prevent the deletion of temporary files.',
- ))
- group.add_option(ManOption(
- '--profile',
- action='callback', callback=self.callback_profile,
- help='profile with \'hotshot\' (into file cvs2svn.hotshot)',
- man_help=(
- 'Profile with \'hotshot\' (into file \\fIcvs2svn.hotshot\\fR).'
- ),
- ))
-
- return group
-
- def callback_options(self, option, opt_str, value, parser):
- parser.values.options_file_found = True
- self.process_options_file(value)
-
- def callback_encoding(self, option, opt_str, value, parser):
- ctx = Ctx()
-
- try:
- ctx.cvs_author_decoder.add_encoding(value)
- ctx.cvs_log_decoder.add_encoding(value)
- ctx.cvs_filename_decoder.add_encoding(value)
- except LookupError, e:
- raise FatalError(str(e))
-
- def callback_fallback_encoding(self, option, opt_str, value, parser):
- ctx = Ctx()
-
- try:
- ctx.cvs_author_decoder.set_fallback_encoding(value)
- ctx.cvs_log_decoder.set_fallback_encoding(value)
- # Don't use fallback_encoding for filenames.
- except LookupError, e:
- raise FatalError(str(e))
-
- def callback_help_passes(self, option, opt_str, value, parser):
- self.pass_manager.help_passes()
- sys.exit(0)
-
- def callback_manpage(self, option, opt_str, value, parser):
- raise NotImplementedError()
-
- def callback_version(self, option, opt_str, value, parser):
- sys.stdout.write(
- '%s version %s\n' % (self.progname, VERSION)
- )
- sys.exit(0)
-
- def callback_verbose(self, option, opt_str, value, parser):
- Log().increase_verbosity()
-
- def callback_quiet(self, option, opt_str, value, parser):
- Log().decrease_verbosity()
-
- def callback_passes(self, option, opt_str, value, parser):
- if value.find(':') >= 0:
- start_pass, end_pass = value.split(':')
- self.start_pass = self.pass_manager.get_pass_number(start_pass, 1)
- self.end_pass = self.pass_manager.get_pass_number(
- end_pass, self.pass_manager.num_passes
- )
- else:
- self.end_pass = \
- self.start_pass = \
- self.pass_manager.get_pass_number(value)
-
- def callback_profile(self, option, opt_str, value, parser):
- self.profiling = True
-
- def callback_symbol_hints(self, option, opt_str, value, parser):
- parser.values.symbol_strategy_rules.append(SymbolHintsFileRule(value))
-
- def callback_force_branch(self, option, opt_str, value, parser):
- parser.values.symbol_strategy_rules.append(
- ForceBranchRegexpStrategyRule(value)
- )
-
- def callback_force_tag(self, option, opt_str, value, parser):
- parser.values.symbol_strategy_rules.append(
- ForceTagRegexpStrategyRule(value)
- )
-
- def callback_exclude(self, option, opt_str, value, parser):
- parser.values.symbol_strategy_rules.append(
- ExcludeRegexpStrategyRule(value)
- )
-
- def callback_cvs_revnums(self, option, opt_str, value, parser):
- Ctx().svn_property_setters.append(CVSRevisionNumberSetter())
-
- def callback_symbol_transform(self, option, opt_str, value, parser):
- [pattern, replacement] = value.split(":")
- try:
- parser.values.symbol_transforms.append(
- RegexpSymbolTransform(pattern, replacement)
- )
- except re.error:
- raise FatalError("'%s' is not a valid regexp." % (pattern,))
-
- def process_symbol_strategy_options(self):
- """Process symbol strategy-related options."""
-
- ctx = Ctx()
- options = self.options
-
- # Add the standard symbol name cleanup rules:
- self.options.symbol_transforms.extend([
- ReplaceSubstringsSymbolTransform('\\','/'),
- # Remove leading, trailing, and repeated slashes:
- NormalizePathsSymbolTransform(),
- ])
-
- if ctx.trunk_only:
- if options.symbol_strategy_rules or options.keep_trivial_imports:
- raise SymbolOptionsWithTrunkOnlyException()
-
- else:
- if not options.keep_trivial_imports:
- options.symbol_strategy_rules.append(ExcludeTrivialImportBranchRule())
-
- options.symbol_strategy_rules.append(UnambiguousUsageRule())
- if options.symbol_default == 'strict':
- pass
- elif options.symbol_default == 'branch':
- options.symbol_strategy_rules.append(AllBranchRule())
- elif options.symbol_default == 'tag':
- options.symbol_strategy_rules.append(AllTagRule())
- elif options.symbol_default == 'heuristic':
- options.symbol_strategy_rules.append(BranchIfCommitsRule())
- options.symbol_strategy_rules.append(HeuristicStrategyRule())
- else:
- assert False
-
- # Now add a rule whose job it is to pick the preferred parents of
- # branches and tags:
- options.symbol_strategy_rules.append(HeuristicPreferredParentRule())
-
- def process_property_setter_options(self):
- """Process the options that set SVN properties."""
-
- ctx = Ctx()
- options = self.options
-
- for value in options.auto_props_files:
- ctx.svn_property_setters.append(
- AutoPropsPropertySetter(value, options.auto_props_ignore_case)
- )
-
- for value in options.mime_types_files:
- ctx.svn_property_setters.append(MimeMapper(value))
-
- ctx.svn_property_setters.append(CVSBinaryFileEOLStyleSetter())
-
- ctx.svn_property_setters.append(CVSBinaryFileDefaultMimeTypeSetter())
-
- if options.eol_from_mime_type:
- ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
-
- ctx.svn_property_setters.append(
- DefaultEOLStyleSetter(options.default_eol)
- )
-
- ctx.svn_property_setters.append(SVNBinaryFileKeywordsPropertySetter())
-
- if not options.keywords_off:
- ctx.svn_property_setters.append(
- KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE))
-
- ctx.svn_property_setters.append(ExecutablePropertySetter())
-
- def process_options(self):
- """Do the main configuration based on command-line options.
-
- This method is only called if the --options option was not
- specified."""
-
- raise NotImplementedError()
-
- def check_options(self):
- """Check the the run options are OK.
-
- This should only be called after all options have been processed."""
-
- # Convenience var, so we don't have to keep instantiating this Borg.
- ctx = Ctx()
-
- if not self.start_pass <= self.end_pass:
- raise InvalidPassError(
- 'Ending pass must not come before starting pass.')
-
- if not ctx.dry_run and ctx.output_option is None:
- raise FatalError('No output option specified.')
-
- if ctx.output_option is not None:
- ctx.output_option.check()
-
- if not self.projects:
- raise FatalError('No project specified.')
-
- def verify_option_compatibility(self):
- """Verify that no options incompatible with --options were used.
-
- The --options option was specified. Verify that no incompatible
- options or arguments were specified."""
-
- if self.options.options_incompatible_options or self.args:
- if self.options.options_incompatible_options:
- oio = self.options.options_incompatible_options
- Log().error(
- '%s: The following options cannot be used in combination with '
- 'the --options\n'
- 'option:\n'
- ' %s\n'
- % (error_prefix, '\n '.join(oio))
- )
- if self.args:
- Log().error(
- '%s: No cvs-repos-path arguments are allowed with the --options '
- 'option.\n'
- % (error_prefix,)
- )
- sys.exit(1)
-
- def process_options_file(self, options_filename):
- """Read options from the file named OPTIONS_FILENAME.
-
- Store the run options to SELF."""
-
- g = {
- 'ctx' : Ctx(),
- 'run_options' : self,
- }
- execfile(options_filename, g)
-
- def usage(self):
- self.parser.print_help()
-
-
diff --git a/cvs2svn_lib/serializer.py b/cvs2svn_lib/serializer.py
deleted file mode 100644
index 24bd81c..0000000
--- a/cvs2svn_lib/serializer.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Picklers and unpicklers that are primed with known objects."""
-
-
-import cStringIO
-import marshal
-import cPickle
-import zlib
-
-
-class Serializer:
- """An object able to serialize/deserialize some class of objects."""
-
- def dumpf(self, f, object):
- """Serialize OBJECT to file-like object F."""
-
- raise NotImplementedError()
-
- def dumps(self, object):
- """Return a string containing OBJECT in serialized form."""
-
- raise NotImplementedError()
-
- def loadf(self, f):
- """Return the next object deserialized from file-like object F."""
-
- raise NotImplementedError()
-
- def loads(self, s):
- """Return the object deserialized from string S."""
-
- raise NotImplementedError()
-
-
-class MarshalSerializer(Serializer):
- """This class uses the marshal module to serialize/deserialize.
-
- This means that it shares the limitations of the marshal module,
- namely only being able to serialize a few simple python data types
- without reference loops."""
-
- def dumpf(self, f, object):
- marshal.dump(object, f)
-
- def dumps(self, object):
- return marshal.dumps(object)
-
- def loadf(self, f):
- return marshal.load(f)
-
- def loads(self, s):
- return marshal.loads(s)
-
-
-class PrimedPickleSerializer(Serializer):
- """This class acts as a pickler/unpickler with a pre-initialized memo.
-
- The picklers and unpicklers are 'pre-trained' to recognize the
- objects that are in the primer. If objects are recognized
- from PRIMER, then only their persistent IDs need to be pickled
- instead of the whole object. (Note that the memos needed for
- pickling and unpickling are different.)
-
- A new pickler/unpickler is created for each use, each time with the
- memo initialized appropriately for pickling or unpickling."""
-
- def __init__(self, primer):
- """Prepare to make picklers/unpicklers with the specified primer.
-
- The Pickler and Unpickler are 'primed' by pre-pickling PRIMER,
- which can be an arbitrary object (e.g., a list of objects that are
- expected to occur frequently in the objects to be serialized)."""
-
- f = cStringIO.StringIO()
- pickler = cPickle.Pickler(f, -1)
- pickler.dump(primer)
- self.pickler_memo = pickler.memo
-
- unpickler = cPickle.Unpickler(cStringIO.StringIO(f.getvalue()))
- unpickler.load()
- self.unpickler_memo = unpickler.memo
-
- def dumpf(self, f, object):
- """Serialize OBJECT to file-like object F."""
-
- pickler = cPickle.Pickler(f, -1)
- pickler.memo = self.pickler_memo.copy()
- pickler.dump(object)
-
- def dumps(self, object):
- """Return a string containing OBJECT in serialized form."""
-
- f = cStringIO.StringIO()
- self.dumpf(f, object)
- return f.getvalue()
-
- def loadf(self, f):
- """Return the next object deserialized from file-like object F."""
-
- unpickler = cPickle.Unpickler(f)
- unpickler.memo = self.unpickler_memo.copy()
- return unpickler.load()
-
- def loads(self, s):
- """Return the object deserialized from string S."""
-
- return self.loadf(cStringIO.StringIO(s))
-
-
-class CompressingSerializer(Serializer):
- """This class wraps other Serializers to compress their serialized data."""
-
- def __init__(self, wrapee):
- """Constructor. WRAPEE is the Serializer whose bitstream ought to be
- compressed."""
-
- self.wrapee = wrapee
-
- def dumpf(self, f, object):
- marshal.dump(zlib.compress(self.wrapee.dumps(object), 9), f)
-
- def dumps(self, object):
- return marshal.dumps(zlib.compress(self.wrapee.dumps(object), 9))
-
- def loadf(self, f):
- return self.wrapee.loads(zlib.decompress(marshal.load(f)))
-
- def loads(self, s):
- return self.wrapee.loads(zlib.decompress(marshal.loads(s)))
-
-
diff --git a/cvs2svn_lib/stats_keeper.py b/cvs2svn_lib/stats_keeper.py
deleted file mode 100644
index 1a82540..0000000
--- a/cvs2svn_lib/stats_keeper.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the StatsKeeper class.
-
-A StatsKeeper can pickle itself to a STATISTICS_FILE. This module
-also includes a function to read a StatsKeeper from a STATISTICS_FILE."""
-
-
-import time
-import cPickle
-from cStringIO import StringIO
-
-from cvs2svn_lib.cvs_item import CVSRevision
-from cvs2svn_lib.cvs_item import CVSBranch
-from cvs2svn_lib.cvs_item import CVSTag
-
-
-class StatsKeeper:
- def __init__(self):
- self._svn_rev_count = None
- self._first_rev_date = 1L<<32
- self._last_rev_date = 0
- self._pass_timings = { }
- self._stats_reflect_exclude = False
- self.reset_cvs_rev_info()
-
- def log_duration_for_pass(self, duration, pass_num, pass_name):
- self._pass_timings[pass_num] = (pass_name, duration,)
-
- def set_stats_reflect_exclude(self, value):
- self._stats_reflect_exclude = value
-
- def reset_cvs_rev_info(self):
- self._repos_file_count = 0
- self._repos_size = 0
- self._cvs_revs_count = 0
- self._cvs_branches_count = 0
- self._cvs_tags_count = 0
-
- # A set of tag_ids seen:
- self._tag_ids = set()
-
- # A set of branch_ids seen:
- self._branch_ids = set()
-
- def record_cvs_file(self, cvs_file):
- self._repos_file_count += 1
- self._repos_size += cvs_file.file_size
-
- def _record_cvs_rev(self, cvs_rev):
- self._cvs_revs_count += 1
-
- if cvs_rev.timestamp < self._first_rev_date:
- self._first_rev_date = cvs_rev.timestamp
-
- if cvs_rev.timestamp > self._last_rev_date:
- self._last_rev_date = cvs_rev.timestamp
-
- def _record_cvs_branch(self, cvs_branch):
- self._cvs_branches_count += 1
- self._branch_ids.add(cvs_branch.symbol.id)
-
- def _record_cvs_tag(self, cvs_tag):
- self._cvs_tags_count += 1
- self._tag_ids.add(cvs_tag.symbol.id)
-
- def record_cvs_item(self, cvs_item):
- if isinstance(cvs_item, CVSRevision):
- self._record_cvs_rev(cvs_item)
- elif isinstance(cvs_item, CVSBranch):
- self._record_cvs_branch(cvs_item)
- elif isinstance(cvs_item, CVSTag):
- self._record_cvs_tag(cvs_item)
- else:
- raise RuntimeError('Unknown CVSItem type')
-
- def set_svn_rev_count(self, count):
- self._svn_rev_count = count
-
- def svn_rev_count(self):
- return self._svn_rev_count
-
- def __getstate__(self):
- state = self.__dict__.copy()
- # This can get kinda large, so we don't store it:
- return state
-
- def archive(self, filename):
- f = open(filename, 'wb')
- cPickle.dump(self, f)
- f.close()
-
- def __str__(self):
- f = StringIO()
- f.write('\n')
- f.write('cvs2svn Statistics:\n')
- f.write('------------------\n')
- f.write('Total CVS Files: %10i\n' % (self._repos_file_count,))
- f.write('Total CVS Revisions: %10i\n' % (self._cvs_revs_count,))
- f.write('Total CVS Branches: %10i\n' % (self._cvs_branches_count,))
- f.write('Total CVS Tags: %10i\n' % (self._cvs_tags_count,))
- f.write('Total Unique Tags: %10i\n' % (len(self._tag_ids),))
- f.write('Total Unique Branches: %10i\n' % (len(self._branch_ids),))
- f.write('CVS Repos Size in KB: %10i\n' % ((self._repos_size / 1024),))
-
- if self._svn_rev_count is not None:
- f.write('Total SVN Commits: %10i\n' % self._svn_rev_count)
-
- f.write(
- 'First Revision Date: %s\n' % (time.ctime(self._first_rev_date),)
- )
- f.write(
- 'Last Revision Date: %s\n' % (time.ctime(self._last_rev_date),)
- )
- f.write('------------------')
-
- if not self._stats_reflect_exclude:
- f.write(
- '\n'
- '(These are unaltered CVS repository stats and do not\n'
- ' reflect tags or branches excluded via --exclude)\n'
- )
-
- return f.getvalue()
-
- @staticmethod
- def _get_timing_format(value):
- # Output times with up to 3 decimal places:
- decimals = max(0, 4 - len('%d' % int(value)))
- length = len(('%%.%df' % decimals) % value)
- return '%%%d.%df' % (length, decimals,)
-
- def single_pass_timing(self, pass_num):
- (pass_name, duration,) = self._pass_timings[pass_num]
- format = self._get_timing_format(duration)
- time_string = format % (duration,)
- return (
- 'Time for pass%d (%s): %s seconds.'
- % (pass_num, pass_name, time_string,)
- )
-
- def timings(self):
- passes = self._pass_timings.keys()
- passes.sort()
- f = StringIO()
- f.write('Timings (seconds):\n')
- f.write('------------------\n')
-
- total = 0.0
- for pass_num in passes:
- (pass_name, duration,) = self._pass_timings[pass_num]
- total += duration
-
- format = self._get_timing_format(total)
-
- for pass_num in passes:
- (pass_name, duration,) = self._pass_timings[pass_num]
- f.write(
- (format + ' pass%-2d %s\n') % (duration, pass_num, pass_name,)
- )
-
- f.write((format + ' total') % total)
- return f.getvalue()
-
-
-def read_stats_keeper(filename):
- """Factory function: Return a _StatsKeeper instance.
-
- Read the instance from FILENAME as written by StatsKeeper.archive()."""
-
- f = open(filename, 'rb')
- retval = cPickle.load(f)
- f.close()
- return retval
-
diff --git a/cvs2svn_lib/stdout_delegate.py b/cvs2svn_lib/stdout_delegate.py
deleted file mode 100644
index 2b4e228..0000000
--- a/cvs2svn_lib/stdout_delegate.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains database facilities used by cvs2svn."""
-
-
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
-
-
-class StdoutDelegate(SVNRepositoryDelegate):
- """Makes no changes to the disk, but writes out information to
- STDOUT about what is happening in the SVN output. Of course, our
- print statements will state that we're doing something, when in
- reality, we aren't doing anything other than printing out that we're
- doing something. Kind of zen, really."""
-
- def __init__(self, total_revs):
- self.total_revs = total_revs
-
- def start_commit(self, revnum, revprops):
- """Prints out the Subversion revision number of the commit that is
- being started."""
-
- Log().verbose("=" * 60)
- Log().normal("Starting Subversion r%d / %d" % (revnum, self.total_revs))
-
- def end_commit(self):
- pass
-
- def initialize_project(self, project):
- Log().verbose(" Initializing project %s" % (project,))
-
- def initialize_lod(self, lod):
- Log().verbose(" Initializing %s" % (lod,))
-
- def mkdir(self, lod, cvs_directory):
- Log().verbose(
- " New Directory %s" % (lod.get_path(cvs_directory.cvs_path),)
- )
-
- def add_path(self, s_item):
- """Print a line stating what path we are 'adding'."""
-
- Log().verbose(" Adding %s" % (s_item.cvs_rev.get_svn_path(),))
-
- def change_path(self, s_item):
- """Print a line stating what path we are 'changing'."""
-
- Log().verbose(" Changing %s" % (s_item.cvs_rev.get_svn_path(),))
-
- def delete_lod(self, lod):
- """Print a line stating that we are 'deleting' LOD."""
-
- Log().verbose(" Deleting %s" % (lod.get_path(),))
-
- def delete_path(self, lod, cvs_path):
- """Print a line stating that we are 'deleting' PATH."""
-
- Log().verbose(" Deleting %s" % (lod.get_path(cvs_path.cvs_path),))
-
- def _show_copy(self, src_path, dest_path, src_revnum):
- """Print a line stating that we are 'copying' revision SRC_REVNUM
- of SRC_PATH to DEST_PATH."""
-
- Log().verbose(
- " Copying revision %d of %s\n"
- " to %s\n"
- % (src_revnum, src_path, dest_path,)
- )
-
- def copy_lod(self, src_lod, dest_lod, src_revnum):
- """Print a line stating that we are 'copying' revision SRC_REVNUM
- of SRC_PATH to DEST_PATH."""
-
- self._show_copy(src_lod.get_path(), dest_lod.get_path(), src_revnum)
-
- def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
- """Print a line stating that we are 'copying' revision SRC_REVNUM
- of CVS_PATH from SRC_LOD to DEST_LOD."""
-
- self._show_copy(
- src_lod.get_path(cvs_path.cvs_path),
- dest_lod.get_path(cvs_path.cvs_path),
- src_revnum,
- )
-
- def finish(self):
- """State that we are done creating our repository."""
-
- Log().verbose("Finished creating Subversion repository.")
- Log().quiet("Done.")
-
-
diff --git a/cvs2svn_lib/svn_commit.py b/cvs2svn_lib/svn_commit.py
deleted file mode 100644
index 25dc38e..0000000
--- a/cvs2svn_lib/svn_commit.py
+++ /dev/null
@@ -1,381 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the SVNCommit classes.
-
-There are five types of SVNCommits:
-
- SVNInitialProjectCommit -- Initializes a project (creates its trunk,
- branches, and tags directories).
-
- SVNPrimaryCommit -- Commits one or more CVSRevisions on one or more
- lines of development.
-
- SVNBranchCommit -- Creates or fills a branch; that is, copies files
- from a source line of development to a target branch.
-
- SVNTagCommit -- Creates or fills a tag; that is, copies files from a
- source line of development to a target tag.
-
- SVNPostCommit -- Updates trunk to reflect changes on a non-trunk
- default branch.
-
-"""
-
-
-import textwrap
-
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.symbol import Branch
-from cvs2svn_lib.symbol import Tag
-
-
-class SVNCommit:
- """This represents one commit to the Subversion Repository."""
-
- # textwrap.TextWrapper instance to be used for wrapping log messages:
- text_wrapper = textwrap.TextWrapper(width=76)
-
- def __init__(self, date, revnum):
- """Instantiate an SVNCommit.
-
- REVNUM is the SVN revision number of this commit."""
-
- # The date of the commit, as an integer. While the SVNCommit is
- # being built up, this contains the latest date seen so far. This
- # member is set externally.
- self.date = date
-
- # The SVN revision number of this commit, as an integer.
- self.revnum = revnum
-
- def __getstate__(self):
- return (self.date, self.revnum,)
-
- def __setstate__(self, state):
- (self.date, self.revnum,) = state
-
- def get_cvs_items(self):
- """Return a list containing the CVSItems in this commit."""
-
- raise NotImplementedError()
-
- def get_author(self):
- """Return the author or this commit, or None if none is to be used.
-
- The return value is exactly as the author appeared in the RCS
- file, with undefined character encoding."""
-
- raise NotImplementedError()
-
- def get_log_msg(self):
- """Return a log message for this commit.
-
- The return value is exactly as the log message appeared in the RCS
- file, with undefined character encoding."""
-
- raise NotImplementedError()
-
- def get_warning_summary(self):
- """Return a summary of this commit that can be used in warnings."""
-
- return '(subversion rev %s)' % (self.revnum,)
-
- def get_description(self):
- """Return a partial description of this SVNCommit, for logging."""
-
- raise NotImplementedError()
-
- def output(self, output_option):
- """Cause this commit to be output to OUTPUT_OPTION.
-
- This method is used for double-dispatch. Derived classes should
- call the OutputOption.process_*_commit() method appropriate for
- the type of SVNCommit."""
-
- raise NotImplementedError()
-
- def __str__(self):
- """ Print a human-readable description of this SVNCommit.
-
- This description is not intended to be machine-parseable."""
-
- ret = "SVNCommit #: " + str(self.revnum) + "\n"
- ret += " debug description: " + self.get_description() + "\n"
- return ret
-
-
-class SVNInitialProjectCommit(SVNCommit):
- def __init__(self, date, projects, revnum):
- SVNCommit.__init__(self, date, revnum)
- self.projects = list(projects)
-
- def __getstate__(self):
- return (
- SVNCommit.__getstate__(self),
- [project.id for project in self.projects],
- )
-
- def __setstate__(self, state):
- (svn_commit_state, project_ids,) = state
- SVNCommit.__setstate__(self, svn_commit_state)
- self.projects = [
- Ctx()._projects[project_id] for project_id in project_ids
- ]
-
- def get_cvs_items(self):
- return []
-
- def get_author(self):
- return Ctx().username
-
- def get_log_msg(self):
- return self.text_wrapper.fill(
- Ctx().initial_project_commit_message % {}
- )
-
- def get_description(self):
- return 'Project initialization'
-
- def output(self, output_option):
- output_option.process_initial_project_commit(self)
-
-
-class SVNRevisionCommit(SVNCommit):
- """A SVNCommit that includes actual CVS revisions."""
-
- def __init__(self, cvs_revs, date, revnum):
- SVNCommit.__init__(self, date, revnum)
-
- self.cvs_revs = list(cvs_revs)
-
- # This value is set lazily by _get_metadata():
- self._metadata = None
-
- def __getstate__(self):
- """Return the part of the state represented by this mixin."""
-
- return (
- SVNCommit.__getstate__(self),
- [cvs_rev.id for cvs_rev in self.cvs_revs],
- )
-
- def __setstate__(self, state):
- """Restore the part of the state represented by this mixin."""
-
- (svn_commit_state, cvs_rev_ids) = state
- SVNCommit.__setstate__(self, svn_commit_state)
-
- self.cvs_revs = [
- cvs_rev
- for (id, cvs_rev) in Ctx()._cvs_items_db.get_many(cvs_rev_ids)
- ]
- self._metadata = None
-
- def get_cvs_items(self):
- return self.cvs_revs
-
- def _get_metadata(self):
- """Return the Metadata instance for this commit."""
-
- if self._metadata is None:
- # Set self._metadata for this commit from that of the first cvs
- # revision.
- if not self.cvs_revs:
- raise InternalError('SVNPrimaryCommit contains no CVS revisions')
-
- metadata_id = self.cvs_revs[0].metadata_id
- self._metadata = Ctx()._metadata_db[metadata_id]
-
- return self._metadata
-
- def get_author(self):
- return self._get_metadata().author
-
- def get_warning_summary(self):
- retval = []
- retval.append(SVNCommit.get_warning_summary(self) + ' Related files:')
- for cvs_rev in self.cvs_revs:
- retval.append(' ' + cvs_rev.cvs_file.filename)
- return '\n'.join(retval)
-
- def __str__(self):
- """Return the revision part of a description of this SVNCommit.
-
- Derived classes should append the output of this method to the
- output of SVNCommit.__str__()."""
-
- ret = []
- ret.append(SVNCommit.__str__(self))
- ret.append(' cvs_revs:\n')
- for cvs_rev in self.cvs_revs:
- ret.append(' %x\n' % (cvs_rev.id,))
- return ''.join(ret)
-
-
-class SVNPrimaryCommit(SVNRevisionCommit):
- def __init__(self, cvs_revs, date, revnum):
- SVNRevisionCommit.__init__(self, cvs_revs, date, revnum)
-
- def get_log_msg(self):
- """Return the actual log message for this commit."""
-
- return self._get_metadata().log_msg
-
- def get_description(self):
- return 'commit'
-
- def output(self, output_option):
- output_option.process_primary_commit(self)
-
-
-class SVNPostCommit(SVNRevisionCommit):
- def __init__(self, motivating_revnum, cvs_revs, date, revnum):
- SVNRevisionCommit.__init__(self, cvs_revs, date, revnum)
-
- # The subversion revision number of the *primary* commit where the
- # default branch changes actually happened. (NOTE: Secondary
- # commits that fill branches and tags also have a motivating
- # commit, but we do not record it because it is (currently) not
- # needed for anything.) motivating_revnum is used when generating
- # the log message for the commit that synchronizes the default
- # branch with trunk.
- #
- # It is possible for multiple synchronization commits to refer to
- # the same motivating commit revision number, and it is possible
- # for a single synchronization commit to contain CVSRevisions on
- # multiple different default branches.
- self.motivating_revnum = motivating_revnum
-
- def __getstate__(self):
- return (
- SVNRevisionCommit.__getstate__(self),
- self.motivating_revnum,
- )
-
- def __setstate__(self, state):
- (rev_state, self.motivating_revnum,) = state
- SVNRevisionCommit.__setstate__(self, rev_state)
-
- def get_cvs_items(self):
- # It might seem that we should return
- # SVNRevisionCommit.get_cvs_items(self) here, but this commit
- # doesn't really include those CVSItems, but rather followup
- # commits to those.
- return []
-
- def get_log_msg(self):
- """Return a manufactured log message for this commit."""
-
- return self.text_wrapper.fill(
- Ctx().post_commit_message % {'revnum' : self.motivating_revnum}
- )
-
- def get_description(self):
- return 'post-commit default branch(es)'
-
- def output(self, output_option):
- output_option.process_post_commit(self)
-
-
-class SVNSymbolCommit(SVNCommit):
- def __init__(self, symbol, cvs_symbol_ids, date, revnum):
- SVNCommit.__init__(self, date, revnum)
-
- # The TypedSymbol that is filled in this SVNCommit.
- self.symbol = symbol
-
- self.cvs_symbol_ids = cvs_symbol_ids
-
- def __getstate__(self):
- return (
- SVNCommit.__getstate__(self),
- self.symbol.id, self.cvs_symbol_ids,
- )
-
- def __setstate__(self, state):
- (svn_commit_state, symbol_id, self.cvs_symbol_ids) = state
- SVNCommit.__setstate__(self, svn_commit_state)
- self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
-
- def get_cvs_items(self):
- return [
- cvs_symbol
- for (id, cvs_symbol)
- in Ctx()._cvs_items_db.get_many(self.cvs_symbol_ids)
- ]
-
- def _get_symbol_type(self):
- """Return the type of the self.symbol ('branch' or 'tag')."""
-
- raise NotImplementedError()
-
- def get_author(self):
- return Ctx().username
-
- def get_log_msg(self):
- """Return a manufactured log message for this commit."""
-
- return self.text_wrapper.fill(
- Ctx().symbol_commit_message % {
- 'symbol_type' : self._get_symbol_type(),
- 'symbol_name' : self.symbol.name,
- }
- )
-
- def get_description(self):
- return 'copying to %s %r' % (self._get_symbol_type(), self.symbol.name,)
-
- def __str__(self):
- """ Print a human-readable description of this SVNCommit.
-
- This description is not intended to be machine-parseable."""
-
- return (
- SVNCommit.__str__(self)
- + " symbolic name: %s\n" % (self.symbol.name,)
- )
-
-
-class SVNBranchCommit(SVNSymbolCommit):
- def __init__(self, symbol, cvs_symbol_ids, date, revnum):
- if not isinstance(symbol, Branch):
- raise InternalError('Incorrect symbol type %r' % (symbol,))
-
- SVNSymbolCommit.__init__(self, symbol, cvs_symbol_ids, date, revnum)
-
- def _get_symbol_type(self):
- return 'branch'
-
- def output(self, output_option):
- output_option.process_branch_commit(self)
-
-
-class SVNTagCommit(SVNSymbolCommit):
- def __init__(self, symbol, cvs_symbol_ids, date, revnum):
- if not isinstance(symbol, Tag):
- raise InternalError('Incorrect symbol type %r' % (symbol,))
-
- SVNSymbolCommit.__init__(self, symbol, cvs_symbol_ids, date, revnum)
-
- def _get_symbol_type(self):
- return 'tag'
-
- def output(self, output_option):
- output_option.process_tag_commit(self)
-
-
diff --git a/cvs2svn_lib/svn_commit_creator.py b/cvs2svn_lib/svn_commit_creator.py
deleted file mode 100644
index c87db38..0000000
--- a/cvs2svn_lib/svn_commit_creator.py
+++ /dev/null
@@ -1,217 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the SVNCommitCreator class."""
-
-
-import time
-
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.cvs_item import CVSRevisionNoop
-from cvs2svn_lib.cvs_item import CVSBranchNoop
-from cvs2svn_lib.cvs_item import CVSTagNoop
-from cvs2svn_lib.changeset import OrderedChangeset
-from cvs2svn_lib.changeset import BranchChangeset
-from cvs2svn_lib.changeset import TagChangeset
-from cvs2svn_lib.svn_commit import SVNInitialProjectCommit
-from cvs2svn_lib.svn_commit import SVNPrimaryCommit
-from cvs2svn_lib.svn_commit import SVNPostCommit
-from cvs2svn_lib.svn_commit import SVNBranchCommit
-from cvs2svn_lib.svn_commit import SVNTagCommit
-from cvs2svn_lib.key_generator import KeyGenerator
-
-
-class SVNCommitCreator:
- """This class creates and yields SVNCommits via process_changeset()."""
-
- def __init__(self):
- # The revision number to assign to the next new SVNCommit.
- self.revnum_generator = KeyGenerator()
-
- # A set containing the Projects that have already been
- # initialized:
- self._initialized_projects = set()
-
- def _post_commit(self, cvs_revs, motivating_revnum, timestamp):
- """Generate any SVNCommits needed to follow CVS_REVS.
-
- That is, handle non-trunk default branches. A revision on a CVS
- non-trunk default branch is visible in a default CVS checkout of
- HEAD. So we copy such commits over to Subversion's trunk so that
- checking out SVN trunk gives the same output as checking out of
- CVS's default branch."""
-
- cvs_revs = [
- cvs_rev
- for cvs_rev in cvs_revs
- if cvs_rev.ntdbr and not isinstance(cvs_rev, CVSRevisionNoop)
- ]
-
- if cvs_revs:
- cvs_revs.sort(
- lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename)
- )
- # Generate an SVNCommit for all of our default branch cvs_revs.
- yield SVNPostCommit(
- motivating_revnum, cvs_revs, timestamp,
- self.revnum_generator.gen_id(),
- )
-
- def _process_revision_changeset(self, changeset, timestamp):
- """Process CHANGESET, using TIMESTAMP as the commit time.
-
- Create and yield one or more SVNCommits in the process. CHANGESET
- must be an OrderedChangeset. TIMESTAMP is used as the timestamp
- for any resulting SVNCommits."""
-
- if not changeset.cvs_item_ids:
- Log().warn('Changeset has no items: %r' % changeset)
- return
-
- Log().verbose('-' * 60)
- Log().verbose('CVS Revision grouping:')
- Log().verbose(' Time: %s' % time.ctime(timestamp))
-
- # Generate an SVNCommit unconditionally. Even if the only change in
- # this group of CVSRevisions is a deletion of an already-deleted
- # file (that is, a CVS revision in state 'dead' whose predecessor
- # was also in state 'dead'), the conversion will still generate a
- # Subversion revision containing the log message for the second dead
- # revision, because we don't want to lose that information.
-
- cvs_revs = list(changeset.iter_cvs_items())
- if cvs_revs:
- cvs_revs.sort(lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename))
- svn_commit = SVNPrimaryCommit(
- cvs_revs, timestamp, self.revnum_generator.gen_id()
- )
-
- yield svn_commit
-
- for cvs_rev in cvs_revs:
- Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum)
-
- # Generate an SVNPostCommit if we have default branch revs. If
- # some of the revisions in this commit happened on a non-trunk
- # default branch, then those files have to be copied into trunk
- # manually after being changed on the branch (because the RCS
- # "default branch" appears as head, i.e., trunk, in practice).
- # Unfortunately, Subversion doesn't support copies with sources
- # in the current txn. All copies must be based in committed
- # revisions. Therefore, we generate the copies in a new
- # revision.
- for svn_post_commit in self._post_commit(
- cvs_revs, svn_commit.revnum, timestamp
- ):
- yield svn_post_commit
-
- def _process_tag_changeset(self, changeset, timestamp):
- """Process TagChangeset CHANGESET, producing a SVNTagCommit.
-
- Filter out CVSTagNoops. If no CVSTags are left, don't generate a
- SVNTagCommit."""
-
- if Ctx().trunk_only:
- raise InternalError(
- 'TagChangeset encountered during a --trunk-only conversion')
-
- cvs_tag_ids = [
- cvs_tag.id
- for cvs_tag in changeset.iter_cvs_items()
- if not isinstance(cvs_tag, CVSTagNoop)
- ]
- if cvs_tag_ids:
- yield SVNTagCommit(
- changeset.symbol, cvs_tag_ids, timestamp,
- self.revnum_generator.gen_id(),
- )
- else:
- Log().debug(
- 'Omitting %r because it contains only CVSTagNoops' % (changeset,)
- )
-
- def _process_branch_changeset(self, changeset, timestamp):
- """Process BranchChangeset CHANGESET, producing a SVNBranchCommit.
-
- Filter out CVSBranchNoops. If no CVSBranches are left, don't
- generate a SVNBranchCommit."""
-
- if Ctx().trunk_only:
- raise InternalError(
- 'BranchChangeset encountered during a --trunk-only conversion')
-
- cvs_branches = [
- cvs_branch
- for cvs_branch in changeset.iter_cvs_items()
- if not isinstance(cvs_branch, CVSBranchNoop)
- ]
- if cvs_branches:
- svn_commit = SVNBranchCommit(
- changeset.symbol,
- [cvs_branch.id for cvs_branch in cvs_branches],
- timestamp,
- self.revnum_generator.gen_id(),
- )
- yield svn_commit
- for cvs_branch in cvs_branches:
- Ctx()._symbolings_logger.log_branch_revision(
- cvs_branch, svn_commit.revnum
- )
- else:
- Log().debug(
- 'Omitting %r because it contains only CVSBranchNoops' % (changeset,)
- )
-
- def process_changeset(self, changeset, timestamp):
- """Process CHANGESET, using TIMESTAMP for all of its entries.
-
- Return a generator that generates the resulting SVNCommits.
-
- The changesets must be fed to this function in proper dependency
- order."""
-
- # First create any new projects that might be opened by the
- # changeset:
- projects_opened = \
- changeset.get_projects_opened() - self._initialized_projects
- if projects_opened:
- if Ctx().cross_project_commits:
- yield SVNInitialProjectCommit(
- timestamp, projects_opened, self.revnum_generator.gen_id()
- )
- else:
- for project in projects_opened:
- yield SVNInitialProjectCommit(
- timestamp, [project], self.revnum_generator.gen_id()
- )
- self._initialized_projects.update(projects_opened)
-
- if isinstance(changeset, OrderedChangeset):
- for svn_commit \
- in self._process_revision_changeset(changeset, timestamp):
- yield svn_commit
- elif isinstance(changeset, TagChangeset):
- for svn_commit in self._process_tag_changeset(changeset, timestamp):
- yield svn_commit
- elif isinstance(changeset, BranchChangeset):
- for svn_commit in self._process_branch_changeset(changeset, timestamp):
- yield svn_commit
- else:
- raise TypeError('Illegal changeset %r' % changeset)
-
-
diff --git a/cvs2svn_lib/svn_commit_item.py b/cvs2svn_lib/svn_commit_item.py
deleted file mode 100644
index 8bc9015..0000000
--- a/cvs2svn_lib/svn_commit_item.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains class SVNCommitItem."""
-
-
-from cvs2svn_lib.context import Ctx
-
-
-class SVNCommitItem:
- """A wrapper class for CVSRevision objects upon which
- Subversion-related data (such as properties) may be hung."""
-
- def __init__(self, cvs_rev, svn_props_changed):
- """Initialize instance and record the properties for this file.
- SVN_PROPS_CHANGED indicates whether the svn: properties are known
- to have changed since the last revision.
-
- The properties are set by the SVNPropertySetters in
- Ctx().svn_property_setters."""
-
- self.cvs_rev = cvs_rev
- # Did the svn properties change for this file (i.e., do they have
- # to be written to the dumpfile?)
- self.svn_props_changed = svn_props_changed
-
- # The properties for this item as a map { key : value }. If VALUE
- # is None, the property should be left unset.
- self.svn_props = { }
-
- for svn_property_setter in Ctx().svn_property_setters:
- svn_property_setter.set_properties(self)
-
- def has_keywords(self):
- return bool(self.svn_props.get('svn:keywords', None))
-
-
diff --git a/cvs2svn_lib/svn_output_option.py b/cvs2svn_lib/svn_output_option.py
deleted file mode 100644
index 86d1ba4..0000000
--- a/cvs2svn_lib/svn_output_option.py
+++ /dev/null
@@ -1,753 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Classes for outputting the converted repository to SVN."""
-
-
-import os
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import FatalException
-from cvs2svn_lib.common import error_prefix
-from cvs2svn_lib.common import format_date
-from cvs2svn_lib.common import PathsNotDisjointException
-from cvs2svn_lib.common import verify_paths_disjoint
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.process import CommandFailedException
-from cvs2svn_lib.process import check_command_runs
-from cvs2svn_lib.process import call_command
-from cvs2svn_lib.cvs_file import CVSDirectory
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.symbol import LineOfDevelopment
-from cvs2svn_lib.cvs_item import CVSRevisionAdd
-from cvs2svn_lib.cvs_item import CVSRevisionChange
-from cvs2svn_lib.cvs_item import CVSRevisionDelete
-from cvs2svn_lib.cvs_item import CVSRevisionNoop
-from cvs2svn_lib.repository_mirror import RepositoryMirror
-from cvs2svn_lib.repository_mirror import PathExistsError
-from cvs2svn_lib.svn_commit_item import SVNCommitItem
-from cvs2svn_lib.openings_closings import SymbolingsReader
-from cvs2svn_lib.fill_source import get_source_set
-from cvs2svn_lib.stdout_delegate import StdoutDelegate
-from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate
-from cvs2svn_lib.repository_delegate import RepositoryDelegate
-from cvs2svn_lib.output_option import OutputOption
-
-
-class SVNOutputOption(OutputOption):
- """An OutputOption appropriate for output to Subversion."""
-
- class ParentMissingError(Exception):
- """The parent of a path is missing.
-
- Exception raised if an attempt is made to add a path to the
- repository mirror but the parent's path doesn't exist in the
- youngest revision of the repository."""
-
- pass
-
- class ExpectedDirectoryError(Exception):
- """A file was found where a directory was expected."""
-
- pass
-
- def __init__(self, author_transforms=None):
- self._mirror = RepositoryMirror()
-
- def to_utf8(s):
- if isinstance(s, unicode):
- return s.encode('utf8')
- else:
- return s
-
- self.author_transforms = {}
- if author_transforms is not None:
- for (cvsauthor, name) in author_transforms.iteritems():
- cvsauthor = to_utf8(cvsauthor)
- name = to_utf8(name)
- self.author_transforms[cvsauthor] = name
-
- def register_artifacts(self, which_pass):
- # These artifacts are needed for SymbolingsReader:
- artifact_manager.register_temp_file_needed(
- config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.SYMBOL_OFFSETS_DB, which_pass
- )
-
- self._mirror.register_artifacts(which_pass)
- Ctx().revision_reader.register_artifacts(which_pass)
-
- def check_symbols(self, symbol_map):
- """Check that the paths of all included LODs are set and disjoint."""
-
- error_found = False
-
- # Check that all included LODs have their base paths set, and
- # collect the paths into a list:
- paths = []
- for lod in symbol_map.itervalues():
- if isinstance(lod, LineOfDevelopment):
- if lod.base_path is None:
- Log().error('%s: No path was set for %r\n' % (error_prefix, lod,))
- error_found = True
- else:
- paths.append(lod.base_path)
-
- # Check that the SVN paths of all LODS are disjoint:
- try:
- verify_paths_disjoint(*paths)
- except PathsNotDisjointException, e:
- Log().error(str(e))
- error_found = True
-
- if error_found:
- raise FatalException(
- 'Please fix the above errors and restart CollateSymbolsPass'
- )
-
- def setup(self, svn_rev_count):
- self._symbolings_reader = SymbolingsReader()
- self._mirror.open()
- self._delegates = []
- Ctx().revision_reader.start()
- self.add_delegate(StdoutDelegate(svn_rev_count))
-
- def _get_author(self, svn_commit):
- author = svn_commit.get_author()
- name = self.author_transforms.get(author, author)
- return name
-
- def _get_revprops(self, svn_commit):
- """Return the Subversion revprops for this SVNCommit."""
-
- return {
- 'svn:author' : self._get_author(svn_commit),
- 'svn:log' : svn_commit.get_log_msg(),
- 'svn:date' : format_date(svn_commit.date),
- }
-
- def start_commit(self, revnum, revprops):
- """Start a new commit."""
-
- self._mirror.start_commit(revnum)
- self._invoke_delegates('start_commit', revnum, revprops)
-
- def end_commit(self):
- """Called at the end of each commit.
-
- This method copies the newly created nodes to the on-disk nodes
- db."""
-
- self._mirror.end_commit()
- self._invoke_delegates('end_commit')
-
- def delete_lod(self, lod):
- """Delete the main path for LOD from the tree.
-
- The path must currently exist. Silently refuse to delete trunk
- paths."""
-
- if isinstance(lod, Trunk):
- # Never delete a Trunk path.
- return
-
- self._mirror.get_current_lod_directory(lod).delete()
- self._invoke_delegates('delete_lod', lod)
-
- def delete_path(self, cvs_path, lod, should_prune=False):
- """Delete CVS_PATH from LOD."""
-
- if cvs_path.parent_directory is None:
- self.delete_lod(lod)
- return
-
- parent_node = self._mirror.get_current_path(
- cvs_path.parent_directory, lod
- )
- del parent_node[cvs_path]
- self._invoke_delegates('delete_path', lod, cvs_path)
-
- if should_prune:
- while parent_node is not None and len(parent_node) == 0:
- # A drawback of this code is that we issue a delete for each
- # path and not just a single delete for the topmost directory
- # pruned.
- node = parent_node
- cvs_path = node.cvs_path
- if cvs_path.parent_directory is None:
- parent_node = None
- self.delete_lod(lod)
- else:
- parent_node = node.parent_mirror_dir
- node.delete()
- self._invoke_delegates('delete_path', lod, cvs_path)
-
- def initialize_project(self, project):
- """Create the basic structure for PROJECT."""
-
- self._invoke_delegates('initialize_project', project)
-
- # Don't invoke delegates.
- self._mirror.add_lod(project.get_trunk())
-
- def change_path(self, cvs_rev):
- """Register a change in self._youngest for the CVS_REV's svn_path."""
-
- # We do not have to update the nodes because our mirror is only
- # concerned with the presence or absence of paths, and a file
- # content change does not cause any path changes.
- self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
-
- def _mkdir_p(self, cvs_directory, lod):
- """Make sure that CVS_DIRECTORY exists in LOD.
-
- If not, create it, calling delegates. Return the node for
- CVS_DIRECTORY."""
-
- try:
- node = self._mirror.get_current_lod_directory(lod)
- except KeyError:
- node = self._mirror.add_lod(lod)
- self._invoke_delegates('initialize_lod', lod)
-
- for sub_path in cvs_directory.get_ancestry()[1:]:
- try:
- node = node[sub_path]
- except KeyError:
- node = node.mkdir(sub_path)
- self._invoke_delegates('mkdir', lod, sub_path)
- if node is None:
- raise self.ExpectedDirectoryError(
- 'File found at \'%s\' where directory was expected.' % (sub_path,)
- )
-
- return node
-
- def add_path(self, cvs_rev):
- """Add the CVS_REV's svn_path to the repository mirror.
-
- Create any missing intermediate paths."""
-
- cvs_file = cvs_rev.cvs_file
- parent_path = cvs_file.parent_directory
- lod = cvs_rev.lod
- parent_node = self._mkdir_p(parent_path, lod)
- parent_node.add_file(cvs_file)
- self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
-
- def copy_lod(self, src_lod, dest_lod, src_revnum):
- """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.
-
- In the youngest revision of the repository, the destination LOD
- *must not* already exist.
-
- Return the new node at DEST_LOD. Note that this node is not
- necessarily writable, though its parent node necessarily is."""
-
- node = self._mirror.copy_lod(src_lod, dest_lod, src_revnum)
- self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum)
- return node
-
- def copy_path(
- self, cvs_path, src_lod, dest_lod, src_revnum, create_parent=False
- ):
- """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD.
-
- In the youngest revision of the repository, the destination's
- parent *must* exist unless CREATE_PARENT is specified. But the
- destination itself *must not* exist.
-
- Return the new node at (CVS_PATH, DEST_LOD), as a
- CurrentMirrorDirectory."""
-
- if cvs_path.parent_directory is None:
- return self.copy_lod(src_lod, dest_lod, src_revnum)
-
- # Get the node of our source, or None if it is a file:
- src_node = self._mirror.get_old_path(cvs_path, src_lod, src_revnum)
-
- # Get the parent path of the destination:
- if create_parent:
- dest_parent_node = self._mkdir_p(cvs_path.parent_directory, dest_lod)
- else:
- try:
- dest_parent_node = self._mirror.get_current_path(
- cvs_path.parent_directory, dest_lod
- )
- except KeyError:
- raise self.ParentMissingError(
- 'Attempt to add path \'%s\' to repository mirror, '
- 'but its parent directory doesn\'t exist in the mirror.'
- % (dest_lod.get_path(cvs_path.cvs_path),)
- )
-
- if cvs_path in dest_parent_node:
- raise PathExistsError(
- 'Attempt to add path \'%s\' to repository mirror '
- 'when it already exists in the mirror.'
- % (dest_lod.get_path(cvs_path.cvs_path),)
- )
-
- dest_parent_node[cvs_path] = src_node
- self._invoke_delegates(
- 'copy_path',
- cvs_path, src_lod, dest_lod, src_revnum
- )
-
- return dest_parent_node[cvs_path]
-
- def fill_symbol(self, svn_symbol_commit, fill_source):
- """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT.
-
- The symbolic name is guaranteed to exist in the Subversion
- repository by the end of this call, even if there are no paths
- under it."""
-
- symbol = svn_symbol_commit.symbol
-
- try:
- dest_node = self._mirror.get_current_lod_directory(symbol)
- except KeyError:
- self._fill_directory(symbol, None, fill_source, None)
- else:
- self._fill_directory(symbol, dest_node, fill_source, None)
-
- def _fill_directory(self, symbol, dest_node, fill_source, parent_source):
- """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.
-
- Use items from FILL_SOURCE, and recurse into the child items.
-
- Fill SYMBOL starting at the path FILL_SOURCE.cvs_path. DEST_NODE
- is the node of this destination path, or None if the destination
- does not yet exist. All directories above this path have already
- been filled. FILL_SOURCE is a FillSource instance describing the
- items within a subtree of the repository that still need to be
- copied to the destination.
-
- PARENT_SOURCE is the SVNRevisionRange that was used to copy the
- parent directory, if it was copied in this commit. We prefer to
- copy from the same source as was used for the parent, since it
- typically requires less touching-up. If PARENT_SOURCE is None,
- then the parent directory was not copied in this commit, so no
- revision is preferable to any other."""
-
- copy_source = fill_source.compute_best_source(parent_source)
-
- # Figure out if we shall copy to this destination and delete any
- # destination path that is in the way.
- if dest_node is None:
- # The destination does not exist at all, so it definitely has to
- # be copied:
- dest_node = self.copy_path(
- fill_source.cvs_path, copy_source.source_lod,
- symbol, copy_source.opening_revnum
- )
- elif (parent_source is not None) and (
- copy_source.source_lod != parent_source.source_lod
- or copy_source.opening_revnum != parent_source.opening_revnum
- ):
- # The parent path was copied from a different source than we
- # need to use, so we have to delete the version that was copied
- # with the parent then re-copy from the correct source:
- self.delete_path(fill_source.cvs_path, symbol)
- dest_node = self.copy_path(
- fill_source.cvs_path, copy_source.source_lod,
- symbol, copy_source.opening_revnum
- )
- else:
- copy_source = parent_source
-
- # The map {CVSPath : FillSource} of entries within this directory
- # that need filling:
- src_entries = fill_source.get_subsource_map()
-
- if copy_source is not None:
- self._prune_extra_entries(
- fill_source.cvs_path, symbol, dest_node, src_entries
- )
-
- return self._cleanup_filled_directory(
- symbol, dest_node, src_entries, copy_source
- )
-
- def _cleanup_filled_directory(
- self, symbol, dest_node, src_entries, copy_source
- ):
- """The directory at DEST_NODE has been filled and pruned; recurse.
-
- Recurse into the SRC_ENTRIES, in alphabetical order. If DEST_NODE
- was copied in this revision, COPY_SOURCE should indicate where it
- was copied from; otherwise, COPY_SOURCE should be None."""
-
- cvs_paths = src_entries.keys()
- cvs_paths.sort()
- for cvs_path in cvs_paths:
- if isinstance(cvs_path, CVSDirectory):
- # Path is a CVSDirectory:
- try:
- dest_subnode = dest_node[cvs_path]
- except KeyError:
- # Path doesn't exist yet; it has to be created:
- dest_node = self._fill_directory(
- symbol, None, src_entries[cvs_path], None
- ).parent_mirror_dir
- else:
- # Path already exists, but might have to be cleaned up:
- dest_node = self._fill_directory(
- symbol, dest_subnode, src_entries[cvs_path], copy_source
- ).parent_mirror_dir
- else:
- # Path is a CVSFile:
- self._fill_file(
- symbol, cvs_path in dest_node, src_entries[cvs_path], copy_source
- )
- # Reread dest_node since the call to _fill_file() might have
- # made it writable:
- dest_node = self._mirror.get_current_path(
- dest_node.cvs_path, dest_node.lod
- )
-
- return dest_node
-
- def _fill_file(self, symbol, dest_existed, fill_source, parent_source):
- """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.
-
- Use items from FILL_SOURCE.
-
- Fill SYMBOL at path FILL_SOURCE.cvs_path. DEST_NODE is the node
- of this destination path, or None if the destination does not yet
- exist. All directories above this path have already been filled
- as needed. FILL_SOURCE is a FillSource instance describing the
- item that needs to be copied to the destination.
-
- PARENT_SOURCE is the source from which the parent directory was
- copied, or None if the parent directory was not copied during this
- commit. We prefer to copy from PARENT_SOURCE, since it typically
- requires less touching-up. If PARENT_SOURCE is None, then the
- parent directory was not copied in this commit, so no revision is
- preferable to any other."""
-
- copy_source = fill_source.compute_best_source(parent_source)
-
- # Figure out if we shall copy to this destination and delete any
- # destination path that is in the way.
- if not dest_existed:
- # The destination does not exist at all, so it definitely has to
- # be copied:
- self.copy_path(
- fill_source.cvs_path, copy_source.source_lod,
- symbol, copy_source.opening_revnum
- )
- elif (parent_source is not None) and (
- copy_source.source_lod != parent_source.source_lod
- or copy_source.opening_revnum != parent_source.opening_revnum
- ):
- # The parent path was copied from a different source than we
- # need to use, so we have to delete the version that was copied
- # with the parent and then re-copy from the correct source:
- self.delete_path(fill_source.cvs_path, symbol)
- self.copy_path(
- fill_source.cvs_path, copy_source.source_lod,
- symbol, copy_source.opening_revnum
- )
-
- def _prune_extra_entries(
- self, dest_cvs_path, symbol, dest_node, src_entries
- ):
- """Delete any entries in DEST_NODE that are not in SRC_ENTRIES."""
-
- delete_list = [
- cvs_path
- for cvs_path in dest_node
- if cvs_path not in src_entries
- ]
-
- # Sort the delete list so that the output is in a consistent
- # order:
- delete_list.sort()
- for cvs_path in delete_list:
- del dest_node[cvs_path]
- self._invoke_delegates('delete_path', symbol, cvs_path)
-
- def add_delegate(self, delegate):
- """Adds DELEGATE to self._delegates.
-
- For every delegate you add, whenever a repository action method is
- performed, delegate's corresponding repository action method is
- called. Multiple delegates will be called in the order that they
- are added. See SVNRepositoryDelegate for more information."""
-
- self._delegates.append(delegate)
-
- def _invoke_delegates(self, method, *args):
- """Invoke a method on each delegate.
-
- Iterate through each of our delegates, in the order that they were
- added, and call the delegate's method named METHOD with the
- arguments in ARGS."""
-
- for delegate in self._delegates:
- getattr(delegate, method)(*args)
-
- def process_initial_project_commit(self, svn_commit):
- self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
-
- for project in svn_commit.projects:
- self.initialize_project(project)
-
- self.end_commit()
-
- def process_primary_commit(self, svn_commit):
- self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
-
- # This actually commits CVSRevisions
- if len(svn_commit.cvs_revs) > 1:
- plural = "s"
- else:
- plural = ""
- Log().verbose("Committing %d CVSRevision%s"
- % (len(svn_commit.cvs_revs), plural))
- for cvs_rev in svn_commit.cvs_revs:
- if isinstance(cvs_rev, CVSRevisionNoop):
- pass
-
- elif isinstance(cvs_rev, CVSRevisionDelete):
- self.delete_path(cvs_rev.cvs_file, cvs_rev.lod, Ctx().prune)
-
- elif isinstance(cvs_rev, CVSRevisionAdd):
- self.add_path(cvs_rev)
-
- elif isinstance(cvs_rev, CVSRevisionChange):
- self.change_path(cvs_rev)
-
- self.end_commit()
-
- def process_post_commit(self, svn_commit):
- self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
-
- Log().verbose(
- 'Synchronizing default branch motivated by %d'
- % (svn_commit.motivating_revnum,)
- )
-
- for cvs_rev in svn_commit.cvs_revs:
- trunk = cvs_rev.cvs_file.project.get_trunk()
- if isinstance(cvs_rev, CVSRevisionAdd):
- # Copy from branch to trunk:
- self.copy_path(
- cvs_rev.cvs_file, cvs_rev.lod, trunk,
- svn_commit.motivating_revnum, True
- )
- elif isinstance(cvs_rev, CVSRevisionChange):
- # Delete old version of the path on trunk...
- self.delete_path(cvs_rev.cvs_file, trunk)
- # ...and copy the new version over from branch:
- self.copy_path(
- cvs_rev.cvs_file, cvs_rev.lod, trunk,
- svn_commit.motivating_revnum, True
- )
- elif isinstance(cvs_rev, CVSRevisionDelete):
- # Delete trunk path:
- self.delete_path(cvs_rev.cvs_file, trunk)
- elif isinstance(cvs_rev, CVSRevisionNoop):
- # Do nothing
- pass
- else:
- raise InternalError('Unexpected CVSRevision type: %s' % (cvs_rev,))
-
- self.end_commit()
-
- def process_branch_commit(self, svn_commit):
- self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
- Log().verbose('Filling branch:', svn_commit.symbol.name)
-
- # Get the set of sources for the symbolic name:
- source_set = get_source_set(
- svn_commit.symbol,
- self._symbolings_reader.get_range_map(svn_commit),
- )
-
- self.fill_symbol(svn_commit, source_set)
-
- self.end_commit()
-
- def process_tag_commit(self, svn_commit):
- self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
- Log().verbose('Filling tag:', svn_commit.symbol.name)
-
- # Get the set of sources for the symbolic name:
- source_set = get_source_set(
- svn_commit.symbol,
- self._symbolings_reader.get_range_map(svn_commit),
- )
-
- self.fill_symbol(svn_commit, source_set)
-
- self.end_commit()
-
- def cleanup(self):
- self._invoke_delegates('finish')
- self._mirror.close()
- self._mirror = None
- Ctx().revision_reader.finish()
- self._symbolings_reader.close()
- del self._symbolings_reader
-
-
-class DumpfileOutputOption(SVNOutputOption):
- """Output the result of the conversion into a dumpfile."""
-
- def __init__(self, dumpfile_path, author_transforms=None):
- SVNOutputOption.__init__(self, author_transforms)
- self.dumpfile_path = dumpfile_path
-
- def check(self):
- pass
-
- def setup(self, svn_rev_count):
- Log().quiet("Starting Subversion Dumpfile.")
- SVNOutputOption.setup(self, svn_rev_count)
- if not Ctx().dry_run:
- self.add_delegate(
- DumpfileDelegate(Ctx().revision_reader, self.dumpfile_path)
- )
-
-
-class RepositoryOutputOption(SVNOutputOption):
- """Output the result of the conversion into an SVN repository."""
-
- def __init__(self, target, author_transforms=None):
- SVNOutputOption.__init__(self, author_transforms)
- self.target = target
-
- def check(self):
- if not Ctx().dry_run:
- # Verify that svnadmin can be executed. The 'help' subcommand
- # should be harmless.
- try:
- check_command_runs([Ctx().svnadmin_executable, 'help'], 'svnadmin')
- except CommandFailedException, e:
- raise FatalError(
- '%s\n'
- 'svnadmin could not be executed. Please ensure that it is\n'
- 'installed and/or use the --svnadmin option.' % (e,))
-
- def setup(self, svn_rev_count):
- Log().quiet("Starting Subversion Repository.")
- SVNOutputOption.setup(self, svn_rev_count)
- if not Ctx().dry_run:
- self.add_delegate(
- RepositoryDelegate(Ctx().revision_reader, self.target)
- )
-
-
-class NewRepositoryOutputOption(RepositoryOutputOption):
- """Output the result of the conversion into a new SVN repository."""
-
- def __init__(
- self, target, fs_type=None, bdb_txn_nosync=None, author_transforms=None, create_options=[]
- ):
- RepositoryOutputOption.__init__(self, target, author_transforms)
- self.bdb_txn_nosync = bdb_txn_nosync
-
- # Determine the options to be passed to "svnadmin create":
- if not fs_type:
- # User didn't say what kind repository (bdb, fsfs, etc). We
- # still pass --bdb-txn-nosync. It's a no-op if the default
- # repository type doesn't support it, but we definitely want it
- # if BDB is the default.
- self.create_options = ['--bdb-txn-nosync']
- elif fs_type == 'bdb':
- # User explicitly specified bdb.
- #
- # Since this is a BDB repository, pass --bdb-txn-nosync, because
- # it gives us a 4-5x speed boost (if cvs2svn is creating the
- # repository, cvs2svn should be the only program accessing the
- # svn repository until cvs2svn is done). But we'll turn no-sync
- # off in self.finish(), unless instructed otherwise.
- self.create_options = ['--fs-type=bdb', '--bdb-txn-nosync']
- else:
- # User specified something other than bdb.
- self.create_options = ['--fs-type=%s' % fs_type]
-
- # Now append the user's explicitly-set create options:
- self.create_options += create_options
-
- def check(self):
- RepositoryOutputOption.check(self)
- if not Ctx().dry_run and os.path.exists(self.target):
- raise FatalError("the svn-repos-path '%s' exists.\n"
- "Remove it, or pass '--existing-svnrepos'."
- % self.target)
-
- def setup(self, svn_rev_count):
- Log().normal("Creating new repository '%s'" % (self.target))
- if Ctx().dry_run:
- # Do not actually create repository:
- pass
- else:
- call_command([
- Ctx().svnadmin_executable, 'create',
- ] + self.create_options + [
- self.target
- ])
-
- RepositoryOutputOption.setup(self, svn_rev_count)
-
- def cleanup(self):
- RepositoryOutputOption.cleanup(self)
-
- # If this is a BDB repository, and we created the repository, and
- # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
- # line in the DB_CONFIG file, because txn syncing should be on by
- # default in BDB repositories.
- #
- # We determine if this is a BDB repository by looking for the
- # DB_CONFIG file, which doesn't exist in FSFS, rather than by
- # checking self.fs_type. That way this code will Do The Right
- # Thing in all circumstances.
- db_config = os.path.join(self.target, "db/DB_CONFIG")
- if Ctx().dry_run:
- # Do not change repository:
- pass
- elif not self.bdb_txn_nosync and os.path.exists(db_config):
- no_sync = 'set_flags DB_TXN_NOSYNC\n'
-
- contents = open(db_config, 'r').readlines()
- index = contents.index(no_sync)
- contents[index] = '# ' + no_sync
- open(db_config, 'w').writelines(contents)
-
-
-class ExistingRepositoryOutputOption(RepositoryOutputOption):
- """Output the result of the conversion into an existing SVN repository."""
-
- def __init__(self, target, author_transforms=None):
- RepositoryOutputOption.__init__(self, target, author_transforms)
-
- def check(self):
- RepositoryOutputOption.check(self)
- if not os.path.isdir(self.target):
- raise FatalError("the svn-repos-path '%s' is not an "
- "existing directory." % self.target)
-
-
diff --git a/cvs2svn_lib/svn_repository_delegate.py b/cvs2svn_lib/svn_repository_delegate.py
deleted file mode 100644
index 00c4a01..0000000
--- a/cvs2svn_lib/svn_repository_delegate.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the SVNRepositoryDelegate class."""
-
-
-class SVNRepositoryDelegate:
- """Abstract superclass for any delegate to SVNOutputOption.
-
- Subclasses must implement all of the methods below.
-
- For each method, a subclass implements, in its own way, the
- Subversion operation implied by the method's name. For example, for
- the add_path method, the DumpfileDelegate would write out a
- 'Node-add:' command to a Subversion dumpfile, the StdoutDelegate
- would merely print that the path is being added to the repository,
- and the RepositoryDelegate would actually cause the path to be added
- to the Subversion repository that it is creating."""
-
- def start_commit(self, revnum, revprops):
- """An SVN commit is starting.
-
- Perform any actions needed to start an SVN commit with revision
- number REVNUM and revision properties REVPROPS."""
-
- raise NotImplementedError()
-
- def end_commit(self):
- """An SVN commit is ending."""
-
- raise NotImplementedError()
-
- def initialize_project(self, project):
- """Initialize PROJECT.
-
- For Subversion, this means to create the trunk, branches, and tags
- directories for PROJECT."""
-
- raise NotImplementedError()
-
- def initialize_lod(self, lod):
- """Initialize LOD with no contents.
-
- LOD is an instance of LineOfDevelopment. It is also possible for
- an LOD to be created by copying from another LOD; such events are
- indicated via the copy_lod() callback."""
-
- raise NotImplementedError()
-
- def mkdir(self, lod, cvs_directory):
- """Create CVS_DIRECTORY within LOD.
-
- LOD is a LineOfDevelopment; CVS_DIRECTORY is a CVSDirectory."""
-
- raise NotImplementedError()
-
- def add_path(self, s_item):
- """Add the path corresponding to S_ITEM to the repository.
-
- S_ITEM is an SVNCommitItem."""
-
- raise NotImplementedError()
-
- def change_path(self, s_item):
- """Change the path corresponding to S_ITEM in the repository.
-
- S_ITEM is an SVNCommitItem."""
-
- raise NotImplementedError()
-
- def delete_lod(self, lod):
- """Delete LOD from the repository.
-
- LOD is a LineOfDevelopment instance."""
-
- raise NotImplementedError()
-
- def delete_path(self, lod, cvs_path):
- """Delete CVS_PATH from LOD.
-
- LOD is a LineOfDevelopment; CVS_PATH is a CVSPath."""
-
- raise NotImplementedError()
-
- def copy_lod(self, src_lod, dest_lod, src_revnum):
- """Copy SRC_LOD in SRC_REVNUM to DEST_LOD.
-
- SRC_LOD and DEST_LOD are both LODs, and SRC_REVNUM is a subversion
- revision number (int)."""
-
- raise NotImplementedError()
-
- def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
- """Copy CVS_PATH in SRC_LOD@SRC_REVNUM to DEST_LOD.
-
- CVS_PATH is a CVSPath, SRC_LOD and DEST_LOD are LODs, and
- SRC_REVNUM is a subversion revision number (int)."""
-
- raise NotImplementedError()
-
- def finish(self):
- """All SVN revisions have been committed.
-
- Perform any necessary cleanup."""
-
- raise NotImplementedError()
-
-
diff --git a/cvs2svn_lib/svn_revision_range.py b/cvs2svn_lib/svn_revision_range.py
deleted file mode 100644
index 04ba7fa..0000000
--- a/cvs2svn_lib/svn_revision_range.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the SVNRevisionRange class."""
-
-
-import bisect
-
-from cvs2svn_lib.common import SVN_INVALID_REVNUM
-
-
-class SVNRevisionRange:
- """The range of subversion revision numbers from which a path can be
- copied. self.opening_revnum is the number of the earliest such
- revision, and self.closing_revnum is one higher than the number of
- the last such revision. If self.closing_revnum is None, then no
- closings were registered."""
-
- def __init__(self, source_lod, opening_revnum):
- self.source_lod = source_lod
- self.opening_revnum = opening_revnum
- self.closing_revnum = None
-
- def add_closing(self, closing_revnum):
- # When we have a non-trunk default branch, we may have multiple
- # closings--only register the first closing we encounter.
- if self.closing_revnum is None:
- self.closing_revnum = closing_revnum
-
- def __contains__(self, revnum):
- """Return True iff REVNUM is contained in the range."""
-
- return (
- self.opening_revnum <= revnum \
- and (self.closing_revnum is None or revnum < self.closing_revnum)
- )
-
- def __str__(self):
- if self.closing_revnum is None:
- return '[%d:]' % (self.opening_revnum,)
- else:
- return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
-
- def __repr__(self):
- return str(self)
-
-
-class RevisionScores:
- """Represent the scores for a range of revisions."""
-
- def __init__(self, svn_revision_ranges):
- """Initialize based on SVN_REVISION_RANGES.
-
- SVN_REVISION_RANGES is a list of SVNRevisionRange objects.
-
- The score of an svn source is defined to be the number of
- SVNRevisionRanges on that LOD that include the revision. A score
- thus indicates that copying the corresponding revision (or any
- following revision up to the next revision in the list) of the
- object in question would yield that many correct paths at or
- underneath the object. There may be other paths underneath it
- that are not correct and would need to be deleted or recopied;
- those can only be detected by descending and examining their
- scores.
-
- If SVN_REVISION_RANGES is empty, then all scores are undefined."""
-
- deltas_map = {}
-
- for range in svn_revision_ranges:
- source_lod = range.source_lod
- try:
- deltas = deltas_map[source_lod]
- except:
- deltas = []
- deltas_map[source_lod] = deltas
- deltas.append((range.opening_revnum, +1))
- if range.closing_revnum is not None:
- deltas.append((range.closing_revnum, -1))
-
- # A map:
- #
- # {SOURCE_LOD : [(REV1 SCORE1), (REV2 SCORE2), (REV3 SCORE3), ...]}
- #
- # where the tuples are sorted by revision number and the revision
- # numbers are distinct. Score is the number of correct paths that
- # would result from using the specified SOURCE_LOD and revision
- # number (or any other revision preceding the next revision
- # listed) as a source. For example, the score of any revision REV
- # in the range REV2 <= REV < REV3 is equal to SCORE2.
- self._scores_map = {}
-
- for (source_lod,deltas) in deltas_map.items():
- # Sort by revision number:
- deltas.sort()
-
- # Initialize output list with zeroth element of deltas. This
- # element must exist, because it was verified that
- # svn_revision_ranges (and therefore openings) is not empty.
- scores = [ deltas[0] ]
- total = deltas[0][1]
- for (rev, change) in deltas[1:]:
- total += change
- if rev == scores[-1][0]:
- # Same revision as last entry; modify last entry:
- scores[-1] = (rev, total)
- else:
- # Previously-unseen revision; create new entry:
- scores.append((rev, total))
- self._scores_map[source_lod] = scores
-
- def get_score(self, range):
- """Return the score for RANGE's opening revision.
-
- If RANGE doesn't appear explicitly in self.scores, use the score
- of the higest revision preceding RANGE. If there are no preceding
- revisions, then the score for RANGE is unknown; in this case,
- return -1."""
-
- try:
- scores = self._scores_map[range.source_lod]
- except KeyError:
- return -1
-
- # Remember, according to the tuple sorting rules,
- #
- # (revnum, anything,) < (revnum+1,) < (revnum+1, anything,)
- predecessor_index = bisect.bisect_right(
- scores, (range.opening_revnum + 1,)
- ) - 1
-
- if predecessor_index < 0:
- return -1
-
- return scores[predecessor_index][1]
-
- def get_best_revnum(self):
- """Find the revnum with the highest score.
-
- Return (revnum, score) for the revnum with the highest score. If
- the highest score is shared by multiple revisions, select the
- oldest revision."""
-
- best_source_lod = None
- best_revnum = SVN_INVALID_REVNUM
- best_score = 0
-
- source_lods = self._scores_map.keys()
- source_lods.sort()
- for source_lod in source_lods:
- for revnum, score in self._scores_map[source_lod]:
- if score > best_score:
- best_source_lod = source_lod
- best_score = score
- best_revnum = revnum
- return best_source_lod, best_revnum, best_score
-
-
diff --git a/cvs2svn_lib/svn_run_options.py b/cvs2svn_lib/svn_run_options.py
deleted file mode 100644
index e757730..0000000
--- a/cvs2svn_lib/svn_run_options.py
+++ /dev/null
@@ -1,543 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module manages cvs2svn run options."""
-
-
-import sys
-import optparse
-import datetime
-import codecs
-
-from cvs2svn_lib.version import VERSION
-from cvs2svn_lib import config
-from cvs2svn_lib.common import warning_prefix
-from cvs2svn_lib.common import error_prefix
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import normalize_svn_path
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.run_options import not_both
-from cvs2svn_lib.run_options import RunOptions
-from cvs2svn_lib.run_options import ContextOption
-from cvs2svn_lib.run_options import IncompatibleOption
-from cvs2svn_lib.run_options import authors
-from cvs2svn_lib.man_writer import ManWriter
-from cvs2svn_lib.project import Project
-from cvs2svn_lib.svn_output_option import DumpfileOutputOption
-from cvs2svn_lib.svn_output_option import ExistingRepositoryOutputOption
-from cvs2svn_lib.svn_output_option import NewRepositoryOutputOption
-from cvs2svn_lib.revision_manager import NullRevisionRecorder
-from cvs2svn_lib.revision_manager import NullRevisionExcluder
-from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
-from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
-from cvs2svn_lib.checkout_internal import InternalRevisionRecorder
-from cvs2svn_lib.checkout_internal import InternalRevisionExcluder
-from cvs2svn_lib.checkout_internal import InternalRevisionReader
-from cvs2svn_lib.symbol_strategy import TrunkPathRule
-from cvs2svn_lib.symbol_strategy import BranchesPathRule
-from cvs2svn_lib.symbol_strategy import TagsPathRule
-
-
-short_desc = 'convert a cvs repository into a subversion repository'
-
-synopsis = """\
-.B cvs2svn
-[\\fIOPTION\\fR]... \\fIOUTPUT-OPTION CVS-REPOS-PATH\\fR
-.br
-.B cvs2svn
-[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
-"""
-
-long_desc = """\
-Create a new Subversion repository based on the version history stored in a
-CVS repository. Each CVS commit will be mirrored in the Subversion
-repository, including such information as date of commit and id of the
-committer.
-.P
-\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
-repository that you want to convert. It is not possible to convert a
-CVS repository to which you only have remote access; see the FAQ for
-more information. This path doesn't have to be the top level
-directory of a CVS repository; it can point at a project within a
-repository, in which case only that project will be converted. This
-path or one of its parent directories has to contain a subdirectory
-called CVSROOT (though the CVSROOT directory can be empty).
-.P
-Multiple CVS repositories can be converted into a single Subversion
-repository in a single run of cvs2svn, but only by using an
-\\fB--options\\fR file.
-"""
-
-files = """\
-A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
-\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
-"""
-
-see_also = [
- ('cvs', '1'),
- ('svn', '1'),
- ('svnadmin', '1'),
- ]
-
-
-class SVNRunOptions(RunOptions):
- def _get_output_options_group(self):
- group = RunOptions._get_output_options_group(self)
-
- group.add_option(IncompatibleOption(
- '--svnrepos', '-s', type='string',
- action='store',
- help='path where SVN repos should be created',
- man_help=(
- 'Write the output of the conversion into a Subversion repository '
- 'located at \\fIpath\\fR. This option causes a new Subversion '
- 'repository to be created at \\fIpath\\fR unless the '
- '\\fB--existing-svnrepos\\fR option is also used.'
- ),
- metavar='PATH',
- ))
- self.parser.set_default('existing_svnrepos', False)
- group.add_option(IncompatibleOption(
- '--existing-svnrepos',
- action='store_true',
- help='load into existing SVN repository (for use with --svnrepos)',
- man_help=(
- 'Load the converted CVS repository into an existing Subversion '
- 'repository, instead of creating a new repository. (This option '
- 'should be used in combination with '
- '\\fB-s\\fR/\\fB--svnrepos\\fR.) The repository must either be '
- 'empty or contain no paths that overlap with those that will '
- 'result from the conversion. Please note that you need write '
- 'permission for the repository files.'
- ),
- ))
- group.add_option(IncompatibleOption(
- '--fs-type', type='string',
- action='store',
- help=(
- 'pass --fs-type=TYPE to "svnadmin create" (for use with '
- '--svnrepos)'
- ),
- man_help=(
- 'Pass \\fI--fs-type\\fR=\\fItype\\fR to "svnadmin create" when '
- 'creating a new repository.'
- ),
- metavar='TYPE',
- ))
- self.parser.set_default('bdb_txn_nosync', False)
- group.add_option(IncompatibleOption(
- '--bdb-txn-nosync',
- action='store_true',
- help=(
- 'pass --bdb-txn-nosync to "svnadmin create" (for use with '
- '--svnrepos)'
- ),
- man_help=(
- 'Pass \\fI--bdb-txn-nosync\\fR to "svnadmin create" when '
- 'creating a new BDB-style Subversion repository.'
- ),
- ))
- self.parser.set_default('create_options', [])
- group.add_option(IncompatibleOption(
- '--create-option', type='string',
- action='append', dest='create_options',
- help='pass OPT to "svnadmin create" (for use with --svnrepos)',
- man_help=(
- 'Pass \\fIopt\\fR to "svnadmin create" when creating a new '
- 'Subversion repository (can be specified multiple times to '
- 'pass multiple options).'
- ),
- metavar='OPT',
- ))
- group.add_option(IncompatibleOption(
- '--dumpfile', type='string',
- action='store',
- help='just produce a dumpfile; don\'t commit to a repos',
- man_help=(
- 'Just produce a dumpfile; don\'t commit to an SVN repository. '
- 'Write the dumpfile to \\fIpath\\fR.'
- ),
- metavar='PATH',
- ))
-
- group.add_option(ContextOption(
- '--dry-run',
- action='store_true',
- help=(
- 'do not create a repository or a dumpfile; just print what '
- 'would happen.'
- ),
- man_help=(
- 'Do not create a repository or a dumpfile; just print the '
- 'details of what cvs2svn would do if it were really converting '
- 'your repository.'
- ),
- ))
-
- # Deprecated options:
- self.parser.set_default('dump_only', False)
- group.add_option(IncompatibleOption(
- '--dump-only',
- action='callback', callback=self.callback_dump_only,
- help=optparse.SUPPRESS_HELP,
- man_help=optparse.SUPPRESS_HELP,
- ))
- group.add_option(IncompatibleOption(
- '--create',
- action='callback', callback=self.callback_create,
- help=optparse.SUPPRESS_HELP,
- man_help=optparse.SUPPRESS_HELP,
- ))
-
- return group
-
- def _get_conversion_options_group(self):
- group = RunOptions._get_conversion_options_group(self)
-
- self.parser.set_default('trunk_base', config.DEFAULT_TRUNK_BASE)
- group.add_option(IncompatibleOption(
- '--trunk', type='string',
- action='store', dest='trunk_base',
- help=(
- 'path for trunk (default: %s)'
- % (config.DEFAULT_TRUNK_BASE,)
- ),
- man_help=(
- 'Set the top-level path to use for trunk in the Subversion '
- 'repository. The default is \\fI%s\\fR.'
- % (config.DEFAULT_TRUNK_BASE,)
- ),
- metavar='PATH',
- ))
- self.parser.set_default('branches_base', config.DEFAULT_BRANCHES_BASE)
- group.add_option(IncompatibleOption(
- '--branches', type='string',
- action='store', dest='branches_base',
- help=(
- 'path for branches (default: %s)'
- % (config.DEFAULT_BRANCHES_BASE,)
- ),
- man_help=(
- 'Set the top-level path to use for branches in the Subversion '
- 'repository. The default is \\fI%s\\fR.'
- % (config.DEFAULT_BRANCHES_BASE,)
- ),
- metavar='PATH',
- ))
- self.parser.set_default('tags_base', config.DEFAULT_TAGS_BASE)
- group.add_option(IncompatibleOption(
- '--tags', type='string',
- action='store', dest='tags_base',
- help=(
- 'path for tags (default: %s)'
- % (config.DEFAULT_TAGS_BASE,)
- ),
- man_help=(
- 'Set the top-level path to use for tags in the Subversion '
- 'repository. The default is \\fI%s\\fR.'
- % (config.DEFAULT_TAGS_BASE,)
- ),
- metavar='PATH',
- ))
- group.add_option(ContextOption(
- '--no-prune',
- action='store_false', dest='prune',
- help='don\'t prune empty directories',
- man_help=(
- 'When all files are deleted from a directory in the Subversion '
- 'repository, don\'t delete the empty directory (the default is '
- 'to delete any empty directories).'
- ),
- ))
- group.add_option(ContextOption(
- '--no-cross-branch-commits',
- action='store_false', dest='cross_branch_commits',
- help='prevent the creation of cross-branch commits',
- man_help=(
- 'Prevent the creation of commits that affect files on multiple '
- 'branches at once.'
- ),
- ))
-
- return group
-
- def _get_extraction_options_group(self):
- group = RunOptions._get_extraction_options_group(self)
-
- self.parser.set_default('use_internal_co', False)
- group.add_option(IncompatibleOption(
- '--use-internal-co',
- action='store_true',
- help=(
- 'use internal code to extract revision contents '
- '(fastest but disk space intensive) (default)'
- ),
- man_help=(
- 'Use internal code to extract revision contents. This '
- 'is up to 50% faster than using \\fB--use-rcs\\fR, but needs '
- 'a lot of disk space: roughly the size of your CVS repository '
- 'plus the peak size of a complete checkout of the repository '
- 'with all branches that existed and still had commits pending '
- 'at a given time. This option is the default.'
- ),
- ))
- self.parser.set_default('use_cvs', False)
- group.add_option(IncompatibleOption(
- '--use-cvs',
- action='store_true',
- help=(
- 'use CVS to extract revision contents (slower than '
- '--use-internal-co or --use-rcs)'
- ),
- man_help=(
- 'Use CVS to extract revision contents. This option is slower '
- 'than \\fB--use-internal-co\\fR or \\fB--use-rcs\\fR.'
- ),
- ))
- self.parser.set_default('use_rcs', False)
- group.add_option(IncompatibleOption(
- '--use-rcs',
- action='store_true',
- help=(
- 'use RCS to extract revision contents (faster than '
- '--use-cvs but fails in some cases)'
- ),
- man_help=(
- 'Use RCS \'co\' to extract revision contents. This option is '
- 'faster than \\fB--use-cvs\\fR but fails in some cases.'
- ),
- ))
-
- return group
-
- def _get_environment_options_group(self):
- group = RunOptions._get_environment_options_group(self)
-
- group.add_option(ContextOption(
- '--svnadmin', type='string',
- action='store', dest='svnadmin_executable',
- help='path to the "svnadmin" program',
- man_help=(
- 'Path to the \\fIsvnadmin\\fR program. (\\fIsvnadmin\\fR is '
- 'needed when the \\fB-s\\fR/\\fB--svnrepos\\fR output option is '
- 'used.)'
- ),
- metavar='PATH',
- ))
-
- return group
-
- def callback_dump_only(self, option, opt_str, value, parser):
- parser.values.dump_only = True
- Log().error(
- warning_prefix +
- ': The --dump-only option is deprecated (it is implied '
- 'by --dumpfile).\n'
- )
-
- def callback_create(self, option, opt_str, value, parser):
- Log().error(
- warning_prefix +
- ': The behaviour produced by the --create option is now the '
- 'default;\n'
- 'passing the option is deprecated.\n'
- )
-
- def callback_manpage(self, option, opt_str, value, parser):
- f = codecs.getwriter('utf_8')(sys.stdout)
- ManWriter(
- parser,
- section='1',
- date=datetime.date.today(),
- source='Version %s' % (VERSION,),
- manual='User Commands',
- short_desc=short_desc,
- synopsis=synopsis,
- long_desc=long_desc,
- files=files,
- authors=authors,
- see_also=see_also,
- ).write_manpage(f)
- sys.exit(0)
-
- def process_extraction_options(self):
- """Process options related to extracting data from the CVS repository."""
-
- ctx = Ctx()
- options = self.options
-
- not_both(options.use_rcs, '--use-rcs',
- options.use_cvs, '--use-cvs')
-
- not_both(options.use_rcs, '--use-rcs',
- options.use_internal_co, '--use-internal-co')
-
- not_both(options.use_cvs, '--use-cvs',
- options.use_internal_co, '--use-internal-co')
-
- if options.use_rcs:
- ctx.revision_recorder = NullRevisionRecorder()
- ctx.revision_excluder = NullRevisionExcluder()
- ctx.revision_reader = RCSRevisionReader(options.co_executable)
- elif options.use_cvs:
- ctx.revision_recorder = NullRevisionRecorder()
- ctx.revision_excluder = NullRevisionExcluder()
- ctx.revision_reader = CVSRevisionReader(options.cvs_executable)
- else:
- # --use-internal-co is the default:
- ctx.revision_recorder = InternalRevisionRecorder(compress=True)
- ctx.revision_excluder = InternalRevisionExcluder()
- ctx.revision_reader = InternalRevisionReader(compress=True)
-
- def process_output_options(self):
- """Process the options related to SVN output."""
-
- ctx = Ctx()
- options = self.options
-
- if options.dump_only and not options.dumpfile:
- raise FatalError("'--dump-only' requires '--dumpfile' to be specified.")
-
- if not options.svnrepos and not options.dumpfile and not ctx.dry_run:
- raise FatalError("must pass one of '-s' or '--dumpfile'.")
-
- not_both(options.svnrepos, '-s',
- options.dumpfile, '--dumpfile')
-
- not_both(options.dumpfile, '--dumpfile',
- options.existing_svnrepos, '--existing-svnrepos')
-
- not_both(options.bdb_txn_nosync, '--bdb-txn-nosync',
- options.existing_svnrepos, '--existing-svnrepos')
-
- not_both(options.dumpfile, '--dumpfile',
- options.bdb_txn_nosync, '--bdb-txn-nosync')
-
- not_both(options.fs_type, '--fs-type',
- options.existing_svnrepos, '--existing-svnrepos')
-
- if (
- options.fs_type
- and options.fs_type != 'bdb'
- and options.bdb_txn_nosync
- ):
- raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
- % options.fs_type)
-
- if options.svnrepos:
- if options.existing_svnrepos:
- ctx.output_option = ExistingRepositoryOutputOption(options.svnrepos)
- else:
- ctx.output_option = NewRepositoryOutputOption(
- options.svnrepos,
- fs_type=options.fs_type, bdb_txn_nosync=options.bdb_txn_nosync,
- create_options=options.create_options)
- else:
- ctx.output_option = DumpfileOutputOption(options.dumpfile)
-
- def add_project(
- self,
- project_cvs_repos_path,
- trunk_path=None, branches_path=None, tags_path=None,
- initial_directories=[],
- symbol_transforms=None,
- symbol_strategy_rules=[],
- ):
- """Add a project to be converted.
-
- Most arguments are passed straight through to the Project
- constructor. SYMBOL_STRATEGY_RULES is an iterable of
- SymbolStrategyRules that will be applied to symbols in this
- project."""
-
- if trunk_path is not None:
- trunk_path = normalize_svn_path(trunk_path, allow_empty=True)
- if branches_path is not None:
- branches_path = normalize_svn_path(branches_path, allow_empty=False)
- if tags_path is not None:
- tags_path = normalize_svn_path(tags_path, allow_empty=False)
-
- initial_directories = [
- path
- for path in [trunk_path, branches_path, tags_path]
- if path
- ] + [
- normalize_svn_path(path)
- for path in initial_directories
- ]
-
- symbol_strategy_rules = list(symbol_strategy_rules)
-
- # Add rules to set the SVN paths for LODs depending on whether
- # they are the trunk, tags, or branches:
- if trunk_path is not None:
- symbol_strategy_rules.append(TrunkPathRule(trunk_path))
- if branches_path is not None:
- symbol_strategy_rules.append(BranchesPathRule(branches_path))
- if tags_path is not None:
- symbol_strategy_rules.append(TagsPathRule(tags_path))
-
- id = len(self.projects)
- project = Project(
- id,
- project_cvs_repos_path,
- initial_directories=initial_directories,
- symbol_transforms=symbol_transforms,
- )
-
- self.projects.append(project)
- self.project_symbol_strategy_rules.append(symbol_strategy_rules)
-
- def clear_projects(self):
- """Clear the list of projects to be converted.
-
- This method is for the convenience of options files, which may
- want to import one another."""
-
- del self.projects[:]
- del self.project_symbol_strategy_rules[:]
-
- def process_options(self):
- # Consistency check for options and arguments.
- if len(self.args) == 0:
- self.usage()
- sys.exit(1)
-
- if len(self.args) > 1:
- Log().error(error_prefix + ": must pass only one CVS repository.\n")
- self.usage()
- sys.exit(1)
-
- cvsroot = self.args[0]
-
- self.process_extraction_options()
- self.process_output_options()
- self.process_symbol_strategy_options()
- self.process_property_setter_options()
-
- # Create the default project (using ctx.trunk, ctx.branches, and
- # ctx.tags):
- self.add_project(
- cvsroot,
- trunk_path=self.options.trunk_base,
- branches_path=self.options.branches_base,
- tags_path=self.options.tags_base,
- symbol_transforms=self.options.symbol_transforms,
- symbol_strategy_rules=self.options.symbol_strategy_rules,
- )
-
-
diff --git a/cvs2svn_lib/symbol.py b/cvs2svn_lib/symbol.py
deleted file mode 100644
index e3a6b35..0000000
--- a/cvs2svn_lib/symbol.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes that represent trunk, branches, and tags.
-
-The classes in this module represent several concepts related to
-symbols and lines of development in the abstract; that is, not within
-a particular file, but across all files in a project.
-
-The classes in this module are organized into the following class
-hierarchy:
-
-AbstractSymbol
- |
- +--LineOfDevelopment
- | |
- | +--Trunk
- | |
- | +--IncludedSymbol (also inherits from TypedSymbol)
- | |
- | +--Branch
- | |
- | +--Tag
- |
- +--Symbol
- |
- +--TypedSymbol
- |
- +--IncludedSymbol (also inherits from LineOfDevelopment)
- | |
- | +--Branch
- | |
- | +--Tag
- |
- +--ExcludedSymbol
-
-Please note the use of multiple inheritance.
-
-All AbstractSymbols contain an id that is globally unique across all
-AbstractSymbols. Moreover, the id of an AbstractSymbol remains the
-same even if the symbol is mutated (as described below), and two
-AbstractSymbols are considered equal iff their ids are the same, even
-if the two instances have different types. Symbols in different
-projects always have different ids and are therefore always distinct.
-(Indeed, this is pretty much the defining characteristic of a
-project.) Even if, for example, two projects each have branches with
-the same name, the Symbols representing the branches are distinct and
-have distinct ids. (This is important to avoid having to rewrite
-databases with new symbol ids in CollateSymbolsPass.)
-
-AbstractSymbols are all initially created in CollectRevsPass as either
-Trunk or Symbol instances. A Symbol instance is essentially an
-undifferentiated Symbol.
-
-In CollateSymbolsPass, it is decided which symbols will be converted
-as branches, which as tags, and which excluded altogether. At the
-beginning of this pass, the symbols are all represented by instances
-of the non-specific Symbol class. During CollateSymbolsPass, each
-Symbol instance is replaced by an instance of Branch, Tag, or
-ExcludedSymbol with the same id. (Trunk instances are left
-unchanged.) At the end of CollateSymbolsPass, all ExcludedSymbols are
-discarded and processing continues with only Trunk, Branch, and Tag
-instances. These three classes inherit from LineOfDevelopment;
-therefore, in later passes the term LineOfDevelopment (abbreviated to
-LOD) is used to refer to such objects."""
-
-
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.common import path_join
-
-
-class AbstractSymbol:
- """Base class for all other classes in this file."""
-
- def __init__(self, id, project):
- self.id = id
- self.project = project
-
- def __hash__(self):
- return self.id
-
- def __eq__(self, other):
- return self.id == other.id
-
-
-class LineOfDevelopment(AbstractSymbol):
- """Base class for Trunk, Branch, and Tag.
-
- This is basically the abstraction for what will be a root tree in
- the Subversion repository."""
-
- def __init__(self, id, project):
- AbstractSymbol.__init__(self, id, project)
- self.base_path = None
-
- def get_path(self, *components):
- """Return the svn path for this LineOfDevelopment."""
-
- return path_join(self.base_path, *components)
-
-
-class Trunk(LineOfDevelopment):
- """Represent the main line of development."""
-
- def __getstate__(self):
- return (self.id, self.project.id, self.base_path,)
-
- def __setstate__(self, state):
- (self.id, project_id, self.base_path,) = state
- self.project = Ctx()._projects[project_id]
-
- def __cmp__(self, other):
- if isinstance(other, Trunk):
- return cmp(self.project, other.project)
- elif isinstance(other, Symbol):
- # Allow Trunk to compare less than Symbols:
- return -1
- else:
- raise NotImplementedError()
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return 'Trunk'
-
- def __repr__(self):
- return '%s<%x>' % (self, self.id,)
-
-
-class Symbol(AbstractSymbol):
- """Represents a symbol within one project in the CVS repository.
-
- Instance of the Symbol class itself are used to represent symbols
- from the CVS repository. CVS, of course, distinguishes between
- normal tags and branch tags, but we allow symbol types to be changed
- in CollateSymbolsPass. Therefore, we store all CVS symbols as
- Symbol instances at the beginning of the conversion.
-
- In CollateSymbolsPass, Symbols are replaced by Branches, Tags, and
- ExcludedSymbols (the latter being discarded at the end of that
- pass)."""
-
- def __init__(self, id, project, name, preferred_parent_id=None):
- AbstractSymbol.__init__(self, id, project)
- self.name = name
-
- # If this symbol has a preferred parent, this member is the id of
- # the LineOfDevelopment instance representing it. If the symbol
- # never appeared in a CVSTag or CVSBranch (for example, because
- # all of the branches on this LOD have been detached from the
- # dependency tree), then this field is set to None. This field is
- # set during FilterSymbolsPass.
- self.preferred_parent_id = preferred_parent_id
-
- def __getstate__(self):
- return (self.id, self.project.id, self.name, self.preferred_parent_id,)
-
- def __setstate__(self, state):
- (self.id, project_id, self.name, self.preferred_parent_id,) = state
- self.project = Ctx()._projects[project_id]
-
- def __cmp__(self, other):
- if isinstance(other, Symbol):
- return cmp(self.project, other.project) \
- or cmp(self.name, other.name) \
- or cmp(self.id, other.id)
- elif isinstance(other, Trunk):
- # Allow Symbols to compare greater than Trunk:
- return +1
- else:
- raise NotImplementedError()
-
- def __str__(self):
- return self.name
-
- def __repr__(self):
- return '%s<%x>' % (self, self.id,)
-
-
-class TypedSymbol(Symbol):
- """A Symbol whose type (branch, tag, or excluded) has been decided."""
-
- def __init__(self, symbol):
- Symbol.__init__(
- self, symbol.id, symbol.project, symbol.name,
- symbol.preferred_parent_id,
- )
-
-
-class IncludedSymbol(TypedSymbol, LineOfDevelopment):
- """A TypedSymbol that will be included in the conversion."""
-
- def __init__(self, symbol):
- TypedSymbol.__init__(self, symbol)
- # We can't call the LineOfDevelopment constructor, so initialize
- # its extra member explicitly:
- try:
- # If the old symbol had a base_path set, then use it:
- self.base_path = symbol.base_path
- except AttributeError:
- self.base_path = None
-
- def __getstate__(self):
- return (TypedSymbol.__getstate__(self), self.base_path,)
-
- def __setstate__(self, state):
- (super_state, self.base_path,) = state
- TypedSymbol.__setstate__(self, super_state)
-
-
-class Branch(IncludedSymbol):
- """An object that describes a CVS branch."""
-
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return 'Branch(%r)' % (self.name,)
-
-
-class Tag(IncludedSymbol):
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return 'Tag(%r)' % (self.name,)
-
-
-class ExcludedSymbol(TypedSymbol):
- def __str__(self):
- """For convenience only. The format is subject to change at any time."""
-
- return 'ExcludedSymbol(%r)' % (self.name,)
-
-
diff --git a/cvs2svn_lib/symbol_database.py b/cvs2svn_lib/symbol_database.py
deleted file mode 100644
index 824f97b..0000000
--- a/cvs2svn_lib/symbol_database.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains the SymbolDatabase class."""
-
-
-import cPickle
-
-from cvs2svn_lib import config
-from cvs2svn_lib.artifact_manager import artifact_manager
-
-
-class SymbolDatabase:
- """Read-only access to symbol database.
-
- This class allows iteration and lookups id -> symbol, where symbol
- is a TypedSymbol instance. The whole database is read into memory
- upon construction."""
-
- def __init__(self):
- # A map { id : TypedSymbol }
- self._symbols = {}
-
- f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'rb')
- symbols = cPickle.load(f)
- f.close()
- for symbol in symbols:
- self._symbols[symbol.id] = symbol
-
- def get_symbol(self, id):
- """Return the symbol instance with id ID.
-
- Raise KeyError if the symbol is not known."""
-
- return self._symbols[id]
-
- def __iter__(self):
- """Iterate over the Symbol instances within this database."""
-
- return self._symbols.itervalues()
-
- def close(self):
- self._symbols = None
-
-
-def create_symbol_database(symbols):
- """Create and fill a symbol database.
-
- Record each symbol that is listed in SYMBOLS, which is an iterable
- containing Trunk and TypedSymbol objects."""
-
- f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'wb')
- cPickle.dump(symbols, f, -1)
- f.close()
-
diff --git a/cvs2svn_lib/symbol_statistics.py b/cvs2svn_lib/symbol_statistics.py
deleted file mode 100644
index 0d35a50..0000000
--- a/cvs2svn_lib/symbol_statistics.py
+++ /dev/null
@@ -1,521 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module gathers and processes statistics about lines of development."""
-
-import cPickle
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import error_prefix
-from cvs2svn_lib.common import FatalException
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.symbol import IncludedSymbol
-from cvs2svn_lib.symbol import Branch
-from cvs2svn_lib.symbol import Tag
-from cvs2svn_lib.symbol import ExcludedSymbol
-
-
-class SymbolPlanError(FatalException):
- pass
-
-
-class SymbolPlanException(SymbolPlanError):
- def __init__(self, stats, symbol, msg):
- self.stats = stats
- self.symbol = symbol
- SymbolPlanError.__init__(
- self,
- 'Cannot convert the following symbol to %s: %s\n %s'
- % (symbol, msg, self.stats,)
- )
-
-
-class IndeterminateSymbolException(SymbolPlanException):
- def __init__(self, stats, symbol):
- SymbolPlanException.__init__(self, stats, symbol, 'Indeterminate type')
-
-
-class _Stats:
- """A summary of information about a symbol (tag or branch).
-
- Members:
-
- lod -- the LineOfDevelopment instance of the lod being described
-
- tag_create_count -- the number of files in which this lod appears
- as a tag
-
- branch_create_count -- the number of files in which this lod
- appears as a branch
-
- branch_commit_count -- the number of files in which there were
- commits on this lod
-
- trivial_import_count -- the number of files in which this branch
- was purely a non-trunk default branch containing exactly one
- revision.
-
- pure_ntdb_count -- the number of files in which this branch was
- purely a non-trunk default branch (consisting only of
- non-trunk default branch revisions).
-
- branch_blockers -- a set of Symbol instances for any symbols that
- sprout from a branch with this name.
-
- possible_parents -- a map {LineOfDevelopment : count} indicating
- in how many files each LOD could have served as the parent of
- self.lod."""
-
- def __init__(self, lod):
- self.lod = lod
- self.tag_create_count = 0
- self.branch_create_count = 0
- self.branch_commit_count = 0
- self.branch_blockers = set()
- self.trivial_import_count = 0
- self.pure_ntdb_count = 0
- self.possible_parents = { }
-
- def register_tag_creation(self):
- """Register the creation of this lod as a tag."""
-
- self.tag_create_count += 1
-
- def register_branch_creation(self):
- """Register the creation of this lod as a branch."""
-
- self.branch_create_count += 1
-
- def register_branch_commit(self):
- """Register that there were commit(s) on this branch in one file."""
-
- self.branch_commit_count += 1
-
- def register_branch_blocker(self, blocker):
- """Register BLOCKER as preventing this symbol from being deleted.
-
- BLOCKER is a tag or a branch that springs from a revision on this
- symbol."""
-
- self.branch_blockers.add(blocker)
-
- def register_trivial_import(self):
- """Register that this branch is a trivial import branch in one file."""
-
- self.trivial_import_count += 1
-
- def register_pure_ntdb(self):
- """Register that this branch is a pure import branch in one file."""
-
- self.pure_ntdb_count += 1
-
- def register_possible_parent(self, lod):
- """Register that LOD was a possible parent for SELF.lod in a file."""
-
- self.possible_parents[lod] = self.possible_parents.get(lod, 0) + 1
-
- def register_branch_possible_parents(self, cvs_branch, cvs_file_items):
- """Register any possible parents of this symbol from CVS_BRANCH."""
-
- # This routine is a bottleneck. So we define some local variables
- # to speed up access to frequently-needed variables.
- register = self.register_possible_parent
- parent_cvs_rev = cvs_file_items[cvs_branch.source_id]
-
- # The "obvious" parent of a branch is the branch holding the
- # revision where the branch is rooted:
- register(parent_cvs_rev.lod)
-
- # Any other branches that are rooted at the same revision and
- # were committed earlier than the branch are also possible
- # parents:
- symbol = cvs_branch.symbol
- for branch_id in parent_cvs_rev.branch_ids:
- parent_symbol = cvs_file_items[branch_id].symbol
- # A branch cannot be its own parent, nor can a branch's
- # parent be a branch that was created after it. So we stop
- # iterating when we reached the branch whose parents we are
- # collecting:
- if parent_symbol == symbol:
- break
- register(parent_symbol)
-
- def register_tag_possible_parents(self, cvs_tag, cvs_file_items):
- """Register any possible parents of this symbol from CVS_TAG."""
-
- # This routine is a bottleneck. So use local variables to speed
- # up access to frequently-needed objects.
- register = self.register_possible_parent
- parent_cvs_rev = cvs_file_items[cvs_tag.source_id]
-
- # The "obvious" parent of a tag is the branch holding the
- # revision where the branch is rooted:
- register(parent_cvs_rev.lod)
-
- # Branches that are rooted at the same revision are also
- # possible parents:
- for branch_id in parent_cvs_rev.branch_ids:
- parent_symbol = cvs_file_items[branch_id].symbol
- register(parent_symbol)
-
- def is_ghost(self):
- """Return True iff this lod never really existed."""
-
- return (
- not isinstance(self.lod, Trunk)
- and self.branch_commit_count == 0
- and not self.branch_blockers
- and not self.possible_parents
- )
-
- def check_valid(self, symbol):
- """Check whether SYMBOL is a valid conversion of SELF.lod.
-
- It is planned to convert SELF.lod as SYMBOL. Verify that SYMBOL
- is a TypedSymbol and that the information that it contains is
- consistent with that stored in SELF.lod. (This routine does not
- do higher-level tests of whether the chosen conversion is actually
- sensible.) If there are any problems, raise a
- SymbolPlanException."""
-
- if not isinstance(symbol, (Trunk, Branch, Tag, ExcludedSymbol)):
- raise IndeterminateSymbolException(self, symbol)
-
- if symbol.id != self.lod.id:
- raise SymbolPlanException(self, symbol, 'IDs must match')
-
- if symbol.project != self.lod.project:
- raise SymbolPlanException(self, symbol, 'Projects must match')
-
- if isinstance(symbol, IncludedSymbol) and symbol.name != self.lod.name:
- raise SymbolPlanException(self, symbol, 'Names must match')
-
- def check_preferred_parent_allowed(self, symbol):
- """Check that SYMBOL's preferred_parent_id is an allowed parent.
-
- SYMBOL is the planned conversion of SELF.lod. Verify that its
- preferred_parent_id is a possible parent of SELF.lod. If not,
- raise a SymbolPlanException describing the problem."""
-
- if isinstance(symbol, IncludedSymbol) \
- and symbol.preferred_parent_id is not None:
- for pp in self.possible_parents.keys():
- if pp.id == symbol.preferred_parent_id:
- return
- else:
- raise SymbolPlanException(
- self, symbol,
- 'The selected parent is not among the symbol\'s '
- 'possible parents.'
- )
-
- def __str__(self):
- return (
- '\'%s\' is '
- 'a tag in %d files, '
- 'a branch in %d files, '
- 'a trivial import in %d files, '
- 'a pure import in %d files, '
- 'and has commits in %d files'
- % (self.lod, self.tag_create_count, self.branch_create_count,
- self.trivial_import_count, self.pure_ntdb_count,
- self.branch_commit_count)
- )
-
- def __repr__(self):
- retval = ['%s\n possible parents:\n' % (self,)]
- parent_counts = self.possible_parents.items()
- parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
- for (symbol, count) in parent_counts:
- if isinstance(symbol, Trunk):
- retval.append(' trunk : %d\n' % count)
- else:
- retval.append(' \'%s\' : %d\n' % (symbol.name, count))
- if self.branch_blockers:
- blockers = list(self.branch_blockers)
- blockers.sort()
- retval.append(' blockers:\n')
- for blocker in blockers:
- retval.append(' \'%s\'\n' % (blocker,))
- return ''.join(retval)
-
-
-class SymbolStatisticsCollector:
- """Collect statistics about lines of development.
-
- Record a summary of information about each line of development in
- the RCS files for later storage into a database. The database is
- created in CollectRevsPass and it is used in CollateSymbolsPass (via
- the SymbolStatistics class).
-
- collect_data._SymbolDataCollector inserts information into instances
- of this class by by calling its register_*() methods.
-
- Its main purpose is to assist in the decisions about which symbols
- can be treated as branches and tags and which may be excluded.
-
- The data collected by this class can be written to the file
- config.SYMBOL_STATISTICS."""
-
- def __init__(self):
- # A map { lod -> _Stats } for all lines of development:
- self._stats = { }
-
- def __getitem__(self, lod):
- """Return the _Stats record for line of development LOD.
-
- Create and register a new one if necessary."""
-
- try:
- return self._stats[lod]
- except KeyError:
- stats = _Stats(lod)
- self._stats[lod] = stats
- return stats
-
- def register(self, cvs_file_items):
- """Register the statistics for each symbol in CVS_FILE_ITEMS."""
-
- for lod_items in cvs_file_items.iter_lods():
- if lod_items.lod is not None:
- branch_stats = self[lod_items.lod]
-
- branch_stats.register_branch_creation()
-
- if lod_items.cvs_revisions:
- branch_stats.register_branch_commit()
-
- if lod_items.is_trivial_import():
- branch_stats.register_trivial_import()
-
- if lod_items.is_pure_ntdb():
- branch_stats.register_pure_ntdb()
-
- for cvs_symbol in lod_items.iter_blockers():
- branch_stats.register_branch_blocker(cvs_symbol.symbol)
-
- if lod_items.cvs_branch is not None:
- branch_stats.register_branch_possible_parents(
- lod_items.cvs_branch, cvs_file_items
- )
-
- for cvs_tag in lod_items.cvs_tags:
- tag_stats = self[cvs_tag.symbol]
-
- tag_stats.register_tag_creation()
-
- tag_stats.register_tag_possible_parents(cvs_tag, cvs_file_items)
-
- def purge_ghost_symbols(self):
- """Purge any symbols that don't have any activity.
-
- Such ghost symbols can arise if a symbol was defined in an RCS
- file but pointed at a non-existent revision."""
-
- for stats in self._stats.values():
- if stats.is_ghost():
- Log().warn('Deleting ghost symbol: %s' % (stats.lod,))
- del self._stats[stats.lod]
-
- def close(self):
- """Store the stats database to the SYMBOL_STATISTICS file."""
-
- f = open(artifact_manager.get_temp_file(config.SYMBOL_STATISTICS), 'wb')
- cPickle.dump(self._stats.values(), f, -1)
- f.close()
- self._stats = None
-
-
-class SymbolStatistics:
- """Read and handle line of development statistics.
-
- The statistics are read from a database created by
- SymbolStatisticsCollector. This class has methods to process the
- statistics information and help with decisions about:
-
- 1. What tags and branches should be processed/excluded
-
- 2. What tags should be forced to be branches and vice versa (this
- class maintains some statistics to help the user decide)
-
- 3. Are there inconsistencies?
-
- - A symbol that is sometimes a branch and sometimes a tag
-
- - A forced branch with commit(s) on it
-
- - A non-excluded branch depends on an excluded branch
-
- The data in this class is read from a pickle file."""
-
- def __init__(self, filename):
- """Read the stats database from FILENAME."""
-
- # A map { LineOfDevelopment -> _Stats } for all lines of
- # development:
- self._stats = { }
-
- # A map { LineOfDevelopment.id -> _Stats } for all lines of
- # development:
- self._stats_by_id = { }
-
- stats_list = cPickle.load(open(filename, 'rb'))
-
- for stats in stats_list:
- self._stats[stats.lod] = stats
- self._stats_by_id[stats.lod.id] = stats
-
- def __len__(self):
- return len(self._stats)
-
- def __getitem__(self, lod_id):
- return self._stats_by_id[lod_id]
-
- def get_stats(self, lod):
- """Return the _Stats object for LineOfDevelopment instance LOD.
-
- Raise KeyError if no such lod exists."""
-
- return self._stats[lod]
-
- def __iter__(self):
- return self._stats.itervalues()
-
- def _check_blocked_excludes(self, symbol_map):
- """Check for any excluded LODs that are blocked by non-excluded symbols.
-
- If any are found, describe the problem to Log().error() and raise
- a FatalException."""
-
- # A list of (lod,[blocker,...]) tuples for excludes that are
- # blocked by the specified non-excluded blockers:
- problems = []
-
- for lod in symbol_map.itervalues():
- if isinstance(lod, ExcludedSymbol):
- # Symbol is excluded; make sure that its blockers are also
- # excluded:
- lod_blockers = []
- for blocker in self.get_stats(lod).branch_blockers:
- if isinstance(symbol_map.get(blocker, None), IncludedSymbol):
- lod_blockers.append(blocker)
- if lod_blockers:
- problems.append((lod, lod_blockers))
-
- if problems:
- s = []
- for (lod, lod_blockers) in problems:
- s.append(
- '%s: %s cannot be excluded because the following symbols '
- 'depend on it:\n'
- % (error_prefix, lod,)
- )
- for blocker in lod_blockers:
- s.append(' %s\n' % (blocker,))
- s.append('\n')
- Log().error(''.join(s))
-
- raise FatalException()
-
- def _check_invalid_tags(self, symbol_map):
- """Check for commits on any symbols that are to be converted as tags.
-
- SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
- indicating how each AbstractSymbol is to be converted. If there
- is a commit on a symbol, then it cannot be converted as a tag. If
- any tags with commits are found, output error messages describing
- the problems then raise a FatalException."""
-
- Log().quiet("Checking for forced tags with commits...")
-
- invalid_tags = [ ]
- for symbol in symbol_map.itervalues():
- if isinstance(symbol, Tag):
- stats = self.get_stats(symbol)
- if stats.branch_commit_count > 0:
- invalid_tags.append(symbol)
-
- if not invalid_tags:
- # No problems found:
- return
-
- s = []
- s.append(
- '%s: The following branches cannot be forced to be tags '
- 'because they have commits:\n'
- % (error_prefix,)
- )
- for tag in invalid_tags:
- s.append(' %s\n' % (tag.name))
- s.append('\n')
- Log().error(''.join(s))
-
- raise FatalException()
-
- def check_consistency(self, symbol_map):
- """Check the plan for how to convert symbols for consistency.
-
- SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
- indicating how each AbstractSymbol is to be converted. If any
- problems are detected, describe the problem to Log().error() and
- raise a FatalException."""
-
- # We want to do all of the consistency checks even if one of them
- # fails, so that the user gets as much feedback as possible. Set
- # this variable to True if any errors are found.
- error_found = False
-
- # Check that the planned preferred parents are OK for all
- # IncludedSymbols:
- for lod in symbol_map.itervalues():
- if isinstance(lod, IncludedSymbol):
- stats = self.get_stats(lod)
- try:
- stats.check_preferred_parent_allowed(lod)
- except SymbolPlanException, e:
- Log().error('%s\n' % (e,))
- error_found = True
-
- try:
- self._check_blocked_excludes(symbol_map)
- except FatalException:
- error_found = True
-
- try:
- self._check_invalid_tags(symbol_map)
- except FatalException:
- error_found = True
-
- if error_found:
- raise FatalException(
- 'Please fix the above errors and restart CollateSymbolsPass'
- )
-
- def exclude_symbol(self, symbol):
- """SYMBOL has been excluded; remove it from our statistics."""
-
- del self._stats[symbol]
- del self._stats_by_id[symbol.id]
-
- # Remove references to this symbol from other statistics objects:
- for stats in self._stats.itervalues():
- stats.branch_blockers.discard(symbol)
- if symbol in stats.possible_parents:
- del stats.possible_parents[symbol]
-
-
diff --git a/cvs2svn_lib/symbol_strategy.py b/cvs2svn_lib/symbol_strategy.py
deleted file mode 100644
index 9d562a8..0000000
--- a/cvs2svn_lib/symbol_strategy.py
+++ /dev/null
@@ -1,685 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2000-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""SymbolStrategy classes determine how to convert symbols."""
-
-import re
-
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import path_join
-from cvs2svn_lib.common import normalize_svn_path
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.symbol import TypedSymbol
-from cvs2svn_lib.symbol import Branch
-from cvs2svn_lib.symbol import Tag
-from cvs2svn_lib.symbol import ExcludedSymbol
-from cvs2svn_lib.symbol_statistics import SymbolPlanError
-
-
-class StrategyRule:
- """A single rule that might determine how to convert a symbol."""
-
- def start(self, symbol_statistics):
- """This method is called once before get_symbol() is ever called.
-
- The StrategyRule can override this method to do whatever it wants
- to prepare itself for work. SYMBOL_STATISTICS is an instance of
- SymbolStatistics containing the statistics for all symbols in all
- projects."""
-
- pass
-
- def get_symbol(self, symbol, stats):
- """Return an object describing what to do with the symbol in STATS.
-
- SYMBOL holds a Trunk or Symbol object as it has been determined so
- far. Hopefully one of these method calls will turn any naked
- Symbol instances into TypedSymbols.
-
- If this rule applies to the SYMBOL (whose statistics are collected
- in STATS), then return a new or modified AbstractSymbol object.
- If this rule doesn't apply, return SYMBOL unchanged."""
-
- raise NotImplementedError()
-
- def finish(self):
- """This method is called once after get_symbol() is done being called.
-
- The StrategyRule can override this method do whatever it wants to
- release resources, etc."""
-
- pass
-
-
-class _RegexpStrategyRule(StrategyRule):
- """A Strategy rule that bases its decisions on regexp matches.
-
- If self.regexp matches a symbol name, return self.action(symbol);
- otherwise, return the symbol unchanged."""
-
- def __init__(self, pattern, action):
- """Initialize a _RegexpStrategyRule.
-
- PATTERN is a string that will be treated as a regexp pattern.
- PATTERN must match a full symbol name for the rule to apply (i.e.,
- it is anchored at the beginning and end of the symbol name).
-
- ACTION is the class representing how the symbol should be
- converted. It should be one of the classes Branch, Tag, or
- ExcludedSymbol.
-
- If PATTERN matches a symbol name, then get_symbol() returns
- ACTION(name, id); otherwise it returns SYMBOL unchanged."""
-
- try:
- self.regexp = re.compile('^' + pattern + '$')
- except re.error:
- raise FatalError("%r is not a valid regexp." % (pattern,))
-
- self.action = action
-
- def log(self, symbol):
- raise NotImplementedError()
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, (Trunk, TypedSymbol)):
- return symbol
- elif self.regexp.match(symbol.name):
- self.log(symbol)
- return self.action(symbol)
- else:
- return symbol
-
-
-class ForceBranchRegexpStrategyRule(_RegexpStrategyRule):
- """Force symbols matching pattern to be branches."""
-
- def __init__(self, pattern):
- _RegexpStrategyRule.__init__(self, pattern, Branch)
-
- def log(self, symbol):
- Log().verbose(
- 'Converting symbol %s as a branch because it matches regexp "%s".'
- % (symbol, self.regexp.pattern,)
- )
-
-
-class ForceTagRegexpStrategyRule(_RegexpStrategyRule):
- """Force symbols matching pattern to be tags."""
-
- def __init__(self, pattern):
- _RegexpStrategyRule.__init__(self, pattern, Tag)
-
- def log(self, symbol):
- Log().verbose(
- 'Converting symbol %s as a tag because it matches regexp "%s".'
- % (symbol, self.regexp.pattern,)
- )
-
-
-class ExcludeRegexpStrategyRule(_RegexpStrategyRule):
- """Exclude symbols matching pattern."""
-
- def __init__(self, pattern):
- _RegexpStrategyRule.__init__(self, pattern, ExcludedSymbol)
-
- def log(self, symbol):
- Log().verbose(
- 'Excluding symbol %s because it matches regexp "%s".'
- % (symbol, self.regexp.pattern,)
- )
-
-
-class ExcludeTrivialImportBranchRule(StrategyRule):
- """If a symbol is a trivial import branch, exclude it.
-
- A trivial import branch is defined to be a branch that only had a
- single import on it (no other kinds of commits) in every file in
- which it appeared. In most cases these branches are worthless."""
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, (Trunk, TypedSymbol)):
- return symbol
- if stats.tag_create_count == 0 \
- and stats.branch_create_count == stats.trivial_import_count:
- Log().verbose(
- 'Excluding branch %s because it is a trivial import branch.'
- % (symbol,)
- )
- return ExcludedSymbol(symbol)
- else:
- return symbol
-
-
-class ExcludeVendorBranchRule(StrategyRule):
- """If a symbol is a pure vendor branch, exclude it.
-
- A pure vendor branch is defined to be a branch that only had imports
- on it (no other kinds of commits) in every file in which it
- appeared."""
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, (Trunk, TypedSymbol)):
- return symbol
- if stats.tag_create_count == 0 \
- and stats.branch_create_count == stats.pure_ntdb_count:
- Log().verbose(
- 'Excluding branch %s because it is a pure vendor branch.'
- % (symbol,)
- )
- return ExcludedSymbol(symbol)
- else:
- return symbol
-
-
-class UnambiguousUsageRule(StrategyRule):
- """If a symbol is used unambiguously as a tag/branch, convert it as such."""
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, (Trunk, TypedSymbol)):
- return symbol
- is_tag = stats.tag_create_count > 0
- is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0
- if is_tag and is_branch:
- # Can't decide
- return symbol
- elif is_branch:
- Log().verbose(
- 'Converting symbol %s as a branch because it is always used '
- 'as a branch.'
- % (symbol,)
- )
- return Branch(symbol)
- elif is_tag:
- Log().verbose(
- 'Converting symbol %s as a tag because it is always used '
- 'as a tag.'
- % (symbol,)
- )
- return Tag(symbol)
- else:
- # The symbol didn't appear at all:
- return symbol
-
-
-class BranchIfCommitsRule(StrategyRule):
- """If there was ever a commit on the symbol, convert it as a branch."""
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, (Trunk, TypedSymbol)):
- return symbol
- elif stats.branch_commit_count > 0:
- Log().verbose(
- 'Converting symbol %s as a branch because there are commits on it.'
- % (symbol,)
- )
- return Branch(symbol)
- else:
- return symbol
-
-
-class HeuristicStrategyRule(StrategyRule):
- """Convert symbol based on how often it was used as a branch/tag.
-
- Whichever happened more often determines how the symbol is
- converted."""
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, (Trunk, TypedSymbol)):
- return symbol
- elif stats.tag_create_count >= stats.branch_create_count:
- Log().verbose(
- 'Converting symbol %s as a tag because it is more often used '
- 'as a tag.'
- % (symbol,)
- )
- return Tag(symbol)
- else:
- Log().verbose(
- 'Converting symbol %s as a branch because it is more often used '
- 'as a branch.'
- % (symbol,)
- )
- return Branch(symbol)
-
-
-class AllBranchRule(StrategyRule):
- """Convert all symbols as branches.
-
- Usually this rule will appear after a list of more careful rules
- (including a general rule like UnambiguousUsageRule) and will
- therefore only apply to the symbols not handled earlier."""
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, (Trunk, TypedSymbol)):
- return symbol
- else:
- Log().verbose(
- 'Converting symbol %s as a branch because no other rules applied.'
- % (symbol,)
- )
- return Branch(symbol)
-
-
-class AllTagRule(StrategyRule):
- """Convert all symbols as tags.
-
- We don't worry about conflicts here; they will be caught later by
- SymbolStatistics.check_consistency().
-
- Usually this rule will appear after a list of more careful rules
- (including a general rule like UnambiguousUsageRule) and will
- therefore only apply to the symbols not handled earlier."""
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, (Trunk, TypedSymbol)):
- return symbol
- else:
- Log().verbose(
- 'Converting symbol %s as a tag because no other rules applied.'
- % (symbol,)
- )
- return Tag(symbol)
-
-
-class TrunkPathRule(StrategyRule):
- """Set the base path for Trunk."""
-
- def __init__(self, trunk_path):
- self.trunk_path = trunk_path
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, Trunk) and symbol.base_path is None:
- symbol.base_path = self.trunk_path
-
- return symbol
-
-
-class SymbolPathRule(StrategyRule):
- """Set the base paths for symbol LODs."""
-
- def __init__(self, symbol_type, base_path):
- self.symbol_type = symbol_type
- self.base_path = base_path
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, self.symbol_type) and symbol.base_path is None:
- symbol.base_path = path_join(self.base_path, symbol.name)
-
- return symbol
-
-
-class BranchesPathRule(SymbolPathRule):
- """Set the base paths for Branch LODs."""
-
- def __init__(self, branch_path):
- SymbolPathRule.__init__(self, Branch, branch_path)
-
-
-class TagsPathRule(SymbolPathRule):
- """Set the base paths for Tag LODs."""
-
- def __init__(self, tag_path):
- SymbolPathRule.__init__(self, Tag, tag_path)
-
-
-class HeuristicPreferredParentRule(StrategyRule):
- """Use a heuristic rule to pick preferred parents.
-
- Pick the parent that should be preferred for any TypedSymbols. As
- parent, use the symbol that appeared most often as a possible parent
- of the symbol in question. If multiple symbols are tied, choose the
- one that comes first according to the Symbol class's natural sort
- order."""
-
- def _get_preferred_parent(self, stats):
- """Return the LODs that are most often possible parents in STATS.
-
- Return the set of LinesOfDevelopment that appeared most often as
- possible parents. The return value might contain multiple symbols
- if multiple LinesOfDevelopment appeared the same number of times."""
-
- best_count = -1
- best_symbol = None
- for (symbol, count) in stats.possible_parents.items():
- if count > best_count or (count == best_count and symbol < best_symbol):
- best_count = count
- best_symbol = symbol
-
- if best_symbol is None:
- return None
- else:
- return best_symbol
-
- def get_symbol(self, symbol, stats):
- if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None:
- preferred_parent = self._get_preferred_parent(stats)
- if preferred_parent is None:
- Log().verbose('%s has no preferred parent' % (symbol,))
- else:
- symbol.preferred_parent_id = preferred_parent.id
- Log().verbose(
- 'The preferred parent of %s is %s' % (symbol, preferred_parent,)
- )
-
- return symbol
-
-
-class ManualTrunkRule(StrategyRule):
- """Change the SVN path of Trunk LODs.
-
- Members:
-
- project_id -- (int or None) The id of the project whose trunk
- should be affected by this rule. If project_id is None, then
- the rule is not project-specific.
-
- svn_path -- (str) The SVN path that should be used as the base
- directory for this trunk. This member must not be None,
- though it may be the empty string for a single-project,
- trunk-only conversion.
-
- """
-
- def __init__(self, project_id, svn_path):
- self.project_id = project_id
- self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
-
- def get_symbol(self, symbol, stats):
- if (self.project_id is not None
- and self.project_id != stats.lod.project.id):
- return symbol
-
- if isinstance(symbol, Trunk):
- symbol.base_path = self.svn_path
-
- return symbol
-
-
-def convert_as_branch(symbol):
- Log().verbose(
- 'Converting symbol %s as a branch because of manual setting.'
- % (symbol,)
- )
- return Branch(symbol)
-
-
-def convert_as_tag(symbol):
- Log().verbose(
- 'Converting symbol %s as a tag because of manual setting.'
- % (symbol,)
- )
- return Tag(symbol)
-
-
-def exclude(symbol):
- Log().verbose(
- 'Excluding symbol %s because of manual setting.'
- % (symbol,)
- )
- return ExcludedSymbol(symbol)
-
-
-class ManualSymbolRule(StrategyRule):
- """Change how particular symbols are converted.
-
- Members:
-
- project_id -- (int or None) The id of the project whose trunk
- should be affected by this rule. If project_id is None, then
- the rule is not project-specific.
-
- symbol_name -- (str) The name of the symbol that should be
- affected by this rule.
-
- conversion -- (callable or None) A callable that converts the
- symbol to its preferred output type. This should normally be
- one of (convert_as_branch, convert_as_tag, exclude). If this
- member is None, then this rule does not affect the symbol's
- output type.
-
- svn_path -- (str) The SVN path that should be used as the base
- directory for this trunk. This member must not be None,
- though it may be the empty string for a single-project,
- trunk-only conversion.
-
- parent_lod_name -- (str or None) The name of the line of
- development that should be preferred as the parent of this
- symbol. (The preferred parent is the line of development from
- which the symbol should sprout.) If this member is set to the
- string '.trunk.', then the symbol will be set to sprout
- directly from trunk. If this member is set to None, then this
- rule won't affect the symbol's parent.
-
- """
-
- def __init__(
- self, project_id, symbol_name, conversion, svn_path, parent_lod_name
- ):
- self.project_id = project_id
- self.symbol_name = symbol_name
- self.conversion = conversion
- if svn_path is None:
- self.svn_path = None
- else:
- self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
- self.parent_lod_name = parent_lod_name
-
- def _get_parent_by_id(self, parent_lod_name, stats):
- """Return the LOD object for the parent with name PARENT_LOD_NAME.
-
- STATS is the _Stats object describing a symbol whose parent needs
- to be determined from its name. If none of its possible parents
- has name PARENT_LOD_NAME, raise a SymbolPlanError."""
-
- for pp in stats.possible_parents.keys():
- if isinstance(pp, Trunk):
- pass
- elif pp.name == parent_lod_name:
- return pp
- else:
- parent_counts = stats.possible_parents.items()
- parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
- lines = [
- '%s is not a valid parent for %s;'
- % (parent_lod_name, stats.lod,),
- ' possible parents (with counts):'
- ]
- for (symbol, count) in parent_counts:
- if isinstance(symbol, Trunk):
- lines.append(' .trunk. : %d' % count)
- else:
- lines.append(' %s : %d' % (symbol.name, count))
- raise SymbolPlanError('\n'.join(lines))
-
- def get_symbol(self, symbol, stats):
- if (self.project_id is not None
- and self.project_id != stats.lod.project.id):
- return symbol
-
- elif isinstance(symbol, Trunk):
- return symbol
-
- elif self.symbol_name == stats.lod.name:
- if self.conversion is not None:
- symbol = self.conversion(symbol)
-
- if self.parent_lod_name is None:
- pass
- elif self.parent_lod_name == '.trunk.':
- symbol.preferred_parent_id = stats.lod.project.trunk_id
- else:
- symbol.preferred_parent_id = self._get_parent_by_id(
- self.parent_lod_name, stats
- ).id
-
- if self.svn_path is not None:
- symbol.base_path = self.svn_path
-
- return symbol
-
-
-class SymbolHintsFileRule(StrategyRule):
- """Use manual symbol configurations read from a file.
-
- The input file is line-oriented with the following format:
-
- <project-id> <symbol-name> <conversion> [<svn-path> [<parent-lod-name>]]
-
- Where the fields are separated by whitespace and
-
- project-id -- the numerical id of the Project to which the
- symbol belongs (numbered starting with 0). This field can
- be '.' if the rule is not project-specific.
-
- symbol-name -- the name of the symbol being specified, or
- '.trunk.' if the rule should apply to trunk.
-
- conversion -- how the symbol should be treated in the
- conversion. This is one of the following values: 'branch',
- 'tag', or 'exclude'. This field can be '.' if the rule
- shouldn't affect how the symbol is treated in the
- conversion.
-
- svn-path -- the SVN path that should serve as the root path of
- this LOD. The path should be expressed as a path relative
- to the SVN root directory, with or without a leading '/'.
- This field can be omitted or '.' if the rule shouldn't
- affect the LOD's SVN path.
-
- parent-lod-name -- the name of the LOD that should serve as this
- symbol's parent. This field can be omitted or '.' if the
- rule shouldn't affect the symbol's parent, or it can be
- '.trunk.' to indicate that the symbol should sprout from the
- project's trunk."""
-
- comment_re = re.compile(r'^(\#|$)')
-
- conversion_map = {
- 'branch' : convert_as_branch,
- 'tag' : convert_as_tag,
- 'exclude' : exclude,
- '.' : None,
- }
-
- def __init__(self, filename):
- self.filename = filename
-
- def start(self, symbol_statistics):
- self._rules = []
-
- f = open(self.filename, 'r')
- for l in f:
- l = l.rstrip()
- s = l.lstrip()
- if self.comment_re.match(s):
- continue
- fields = s.split()
-
- if len(fields) < 3:
- raise FatalError(
- 'The following line in "%s" cannot be parsed:\n "%s"'
- % (self.filename, l,)
- )
-
- project_id = fields.pop(0)
- symbol_name = fields.pop(0)
- conversion = fields.pop(0)
-
- if fields:
- svn_path = fields.pop(0)
- if svn_path == '.':
- svn_path = None
- elif svn_path[0] == '/':
- svn_path = svn_path[1:]
- else:
- svn_path = None
-
- if fields:
- parent_lod_name = fields.pop(0)
- else:
- parent_lod_name = '.'
-
- if fields:
- raise FatalError(
- 'The following line in "%s" cannot be parsed:\n "%s"'
- % (self.filename, l,)
- )
-
- if project_id == '.':
- project_id = None
- else:
- try:
- project_id = int(project_id)
- except ValueError:
- raise FatalError(
- 'Illegal project_id in the following line:\n "%s"' % (l,)
- )
-
- if symbol_name == '.trunk.':
- if conversion not in ['.', 'trunk']:
- raise FatalError('Trunk cannot be converted as a different type')
-
- if parent_lod_name != '.':
- raise FatalError('Trunk\'s parent cannot be set')
-
- if svn_path is None:
- # This rule doesn't do anything:
- pass
- else:
- self._rules.append(ManualTrunkRule(project_id, svn_path))
-
- else:
- try:
- conversion = self.conversion_map[conversion]
- except KeyError:
- raise FatalError(
- 'Illegal conversion in the following line:\n "%s"' % (l,)
- )
-
- if parent_lod_name == '.':
- parent_lod_name = None
-
- if conversion is None \
- and svn_path is None \
- and parent_lod_name is None:
- # There is nothing to be done:
- pass
- else:
- self._rules.append(
- ManualSymbolRule(
- project_id, symbol_name,
- conversion, svn_path, parent_lod_name
- )
- )
-
- for rule in self._rules:
- rule.start(symbol_statistics)
-
- def get_symbol(self, symbol, stats):
- for rule in self._rules:
- symbol = rule.get_symbol(symbol, stats)
-
- return symbol
-
- def finish(self):
- for rule in self._rules:
- rule.finish()
-
- del self._rules
-
-
diff --git a/cvs2svn_lib/symbol_transform.py b/cvs2svn_lib/symbol_transform.py
deleted file mode 100644
index a4995b8..0000000
--- a/cvs2svn_lib/symbol_transform.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes to transform symbol names."""
-
-
-import os
-import re
-
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import IllegalSVNPathError
-from cvs2svn_lib.common import normalize_svn_path
-
-
-class SymbolTransform:
- """Transform symbol names arbitrarily."""
-
- def transform(self, cvs_file, symbol_name, revision):
- """Possibly transform SYMBOL_NAME, which was found in CVS_FILE.
-
- Return the transformed symbol name. If this SymbolTransform
- doesn't apply, return the original SYMBOL_NAME. If this symbol
- should be ignored entirely, return None. (Please note that
- ignoring a branch via this mechanism only causes the branch *name*
- to be ignored; the branch contents will still be converted.
- Usually branches should be excluded using --exclude.)
-
- REVISION contains the CVS revision number to which the symbol was
- attached in the file as a string (with zeros removed).
-
- This method is free to use the information in CVS_FILE (including
- CVS_FILE.project) to decide whether and/or how to transform
- SYMBOL_NAME."""
-
- raise NotImplementedError()
-
-
-class ReplaceSubstringsSymbolTransform(SymbolTransform):
- """Replace specific substrings in symbol names.
-
- If the substring occurs multiple times, replace all copies."""
-
- def __init__(self, old, new):
- self.old = old
- self.new = new
-
- def transform(self, cvs_file, symbol_name, revision):
- return symbol_name.replace(self.old, self.new)
-
-
-class NormalizePathsSymbolTransform(SymbolTransform):
- def transform(self, cvs_file, symbol_name, revision):
- try:
- return normalize_svn_path(symbol_name)
- except IllegalSVNPathError, e:
- raise FatalError('Problem with %s: %s' % (symbol_name, e,))
-
-
-class CompoundSymbolTransform(SymbolTransform):
- """A SymbolTransform that applies other SymbolTransforms in series.
-
- Each of the contained SymbolTransforms is applied, one after the
- other. If any of them returns None, then None is returned (the
- following SymbolTransforms are ignored)."""
-
- def __init__(self, symbol_transforms):
- """Ininitialize a CompoundSymbolTransform.
-
- SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances."""
-
- self.symbol_transforms = list(symbol_transforms)
-
- def transform(self, cvs_file, symbol_name, revision):
- for symbol_transform in self.symbol_transforms:
- symbol_name = symbol_transform.transform(
- cvs_file, symbol_name, revision
- )
- if symbol_name is None:
- # Don't continue with other symbol transforms:
- break
-
- return symbol_name
-
-
-class RegexpSymbolTransform(SymbolTransform):
- """Transform symbols by using a regexp textual substitution."""
-
- def __init__(self, pattern, replacement):
- """Create a SymbolTransform that transforms symbols matching PATTERN.
-
- PATTERN is a regular expression that should match the whole symbol
- name. REPLACEMENT is the replacement text, which may include
- patterns like r'\1' or r'\g<1>' or r'\g<name>' (where 'name' is a
- reference to a named substring in the pattern of the form
- r'(?P<name>...)')."""
-
- self.pattern = re.compile('^' + pattern + '$')
- self.replacement = replacement
-
- def transform(self, cvs_file, symbol_name, revision):
- return self.pattern.sub(self.replacement, symbol_name)
-
-
-class SymbolMapper(SymbolTransform):
- """A SymbolTransform that transforms specific symbol definitions.
-
- The user has to specify the exact CVS filename, symbol name, and
- revision number to be transformed, and the new name (or None if the
- symbol should be ignored). The mappings can be set via a
- constructor argument or by calling __setitem__()."""
-
- def __init__(self, items=[]):
- """Initialize the mapper.
-
- ITEMS is a list of tuples (cvs_filename, symbol_name, revision,
- new_name) which will be set as mappings."""
-
- # A map {(cvs_filename, symbol_name, revision) : new_name}:
- self._map = {}
-
- for (cvs_filename, symbol_name, revision, new_name) in items:
- self[cvs_filename, symbol_name, revision] = new_name
-
- def __setitem__(self, (cvs_filename, symbol_name, revision), new_name):
- """Set a mapping for a particular file, symbol, and revision."""
-
- cvs_filename = os.path.normcase(os.path.normpath(cvs_filename))
- key = (cvs_filename, symbol_name, revision)
- if key in self._map:
- Log().warn(
- 'Overwriting symbol transform for\n'
- ' filename=%r symbol=%s revision=%s'
- % (cvs_filename, symbol_name, revision,)
- )
- self._map[key] = new_name
-
- def transform(self, cvs_file, symbol_name, revision):
- cvs_filename = os.path.normcase(os.path.normpath(cvs_file.filename))
- return self._map.get(
- (cvs_filename, symbol_name, revision), symbol_name
- )
-
-
-class SubtreeSymbolMapper(SymbolTransform):
- """A SymbolTransform that transforms symbols within a whole repo subtree.
-
- The user has to specify a CVS repository path (a filename or
- directory) and the original symbol name. All symbols under that
- path will be renamed to the specified new name (which can be None if
- the symbol should be ignored). The mappings can be set via a
- constructor argument or by calling __setitem__(). Only the most
- specific rule is applied."""
-
- def __init__(self, items=[]):
- """Initialize the mapper.
-
- ITEMS is a list of tuples (cvs_path, symbol_name, new_name)
- which will be set as mappings. cvs_path is a string naming a
- directory within the CVS repository."""
-
- # A map {symbol_name : {cvs_path : new_name}}:
- self._map = {}
-
- for (cvs_path, symbol_name, new_name) in items:
- self[cvs_path, symbol_name] = new_name
-
- def __setitem__(self, (cvs_path, symbol_name), new_name):
- """Set a mapping for a particular file and symbol."""
-
- try:
- symbol_map = self._map[symbol_name]
- except KeyError:
- symbol_map = {}
- self._map[symbol_name] = symbol_map
-
- cvs_path = os.path.normcase(os.path.normpath(cvs_path))
- if cvs_path in symbol_map:
- Log().warn(
- 'Overwriting symbol transform for\n'
- ' directory=%r symbol=%s'
- % (cvs_path, symbol_name,)
- )
- symbol_map[cvs_path] = new_name
-
- def transform(self, cvs_file, symbol_name, revision):
- try:
- symbol_map = self._map[symbol_name]
- except KeyError:
- # No rules for that symbol name
- return symbol_name
-
- cvs_path = os.path.normcase(os.path.normpath(cvs_file.filename))
- while True:
- try:
- return symbol_map[cvs_path]
- except KeyError:
- new_cvs_path = os.path.dirname(cvs_path)
- if new_cvs_path == cvs_path:
- # No rules found for that path; return symbol name unaltered.
- return symbol_name
- else:
- cvs_path = new_cvs_path
-
-
-class IgnoreSymbolTransform(SymbolTransform):
- """Ignore symbols matching a specified regular expression."""
-
- def __init__(self, pattern):
- """Create an SymbolTransform that ignores symbols matching PATTERN.
-
- PATTERN is a regular expression that should match the whole symbol
- name."""
-
- self.pattern = re.compile('^' + pattern + '$')
-
- def transform(self, cvs_file, symbol_name, revision):
- if self.pattern.match(symbol_name):
- return None
- else:
- return symbol_name
-
-
diff --git a/cvs2svn_lib/time_range.py b/cvs2svn_lib/time_range.py
deleted file mode 100644
index f7dc234..0000000
--- a/cvs2svn_lib/time_range.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2008 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains a class to manage time ranges."""
-
-
-class TimeRange(object):
- __slots__ = ('t_min', 't_max')
-
- def __init__(self):
- # Start out with a t_min higher than any incoming time T, and a
- # t_max lower than any incoming T. This way the first T will push
- # t_min down to T, and t_max up to T, naturally (without any
- # special-casing), and successive times will then ratchet them
- # outward as appropriate.
- self.t_min = 1L<<32
- self.t_max = 0
-
- def add(self, timestamp):
- """Expand the range to encompass TIMESTAMP."""
-
- if timestamp < self.t_min:
- self.t_min = timestamp
- if timestamp > self.t_max:
- self.t_max = timestamp
-
- def __cmp__(self, other):
- # Sorted by t_max, and break ties using t_min.
- return cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
-
-
diff --git a/cvs2svn_lib/version.py b/cvs2svn_lib/version.py
deleted file mode 100644
index 7900964..0000000
--- a/cvs2svn_lib/version.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python2
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-# The version of cvs2svn:
-VERSION = '2.3.0'
-
-
-# If this file is run as a script, print the cvs2svn version number to
-# stdout:
-if __name__ == '__main__':
- print VERSION
-
-
diff --git a/cvs2svn_rcsparse/__init__.py b/cvs2svn_rcsparse/__init__.py
deleted file mode 100644
index 829c117..0000000
--- a/cvs2svn_rcsparse/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# -*-python-*-
-#
-# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
-#
-# By using this file, you agree to the terms and conditions set forth in
-# the LICENSE.html file which can be found at the top level of the ViewVC
-# distribution or at http://viewvc.org/license-1.html.
-#
-# For more information, visit http://viewvc.org/
-#
-# -----------------------------------------------------------------------
-
-"""This package provides parsing tools for RCS files."""
-
-from common import *
-
-try:
- from tparse import parse
-except ImportError:
- try:
- from texttools import Parser
- except ImportError:
- from default import Parser
-
- def parse(file, sink):
- return Parser().parse(file, sink)
diff --git a/cvs2svn_rcsparse/common.py b/cvs2svn_rcsparse/common.py
deleted file mode 100644
index 3eed600..0000000
--- a/cvs2svn_rcsparse/common.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# -*-python-*-
-#
-# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
-#
-# By using this file, you agree to the terms and conditions set forth in
-# the LICENSE.html file which can be found at the top level of the ViewVC
-# distribution or at http://viewvc.org/license-1.html.
-#
-# For more information, visit http://viewvc.org/
-#
-# -----------------------------------------------------------------------
-
-"""common.py: common classes and functions for the RCS parsing tools."""
-
-import calendar
-import string
-
-class Sink:
- def set_head_revision(self, revision):
- pass
-
- def set_principal_branch(self, branch_name):
- pass
-
- def set_access(self, accessors):
- pass
-
- def define_tag(self, name, revision):
- pass
-
- def set_locker(self, revision, locker):
- pass
-
- def set_locking(self, mode):
- """Used to signal locking mode.
-
- Called with mode argument 'strict' if strict locking
- Not called when no locking used."""
-
- pass
-
- def set_comment(self, comment):
- pass
-
- def set_expansion(self, mode):
- pass
-
- def admin_completed(self):
- pass
-
- def define_revision(self, revision, timestamp, author, state,
- branches, next):
- pass
-
- def tree_completed(self):
- pass
-
- def set_description(self, description):
- pass
-
- def set_revision_info(self, revision, log, text):
- pass
-
- def parse_completed(self):
- pass
-
-
-# --------------------------------------------------------------------------
-#
-# EXCEPTIONS USED BY RCSPARSE
-#
-
-class RCSParseError(Exception):
- pass
-
-
-class RCSIllegalCharacter(RCSParseError):
- pass
-
-
-class RCSExpected(RCSParseError):
- def __init__(self, got, wanted):
- RCSParseError.__init__(
- self,
- 'Unexpected parsing error in RCS file.\n'
- 'Expected token: %s, but saw: %s'
- % (wanted, got)
- )
-
-
-class RCSStopParser(Exception):
- pass
-
-
-# --------------------------------------------------------------------------
-#
-# STANDARD TOKEN STREAM-BASED PARSER
-#
-
-class _Parser:
- stream_class = None # subclasses need to define this
-
- def _read_until_semicolon(self):
- """Read all tokens up to and including the next semicolon token.
-
- Return the tokens (not including the semicolon) as a list."""
-
- tokens = []
-
- while 1:
- token = self.ts.get()
- if token == ';':
- break
- tokens.append(token)
-
- return tokens
-
- def _parse_admin_head(self, token):
- rev = self.ts.get()
- if rev == ';':
- # The head revision is not specified. Just drop the semicolon
- # on the floor.
- pass
- else:
- self.sink.set_head_revision(rev)
- self.ts.match(';')
-
- def _parse_admin_branch(self, token):
- branch = self.ts.get()
- if branch != ';':
- self.sink.set_principal_branch(branch)
- self.ts.match(';')
-
- def _parse_admin_access(self, token):
- accessors = self._read_until_semicolon()
- if accessors:
- self.sink.set_access(accessors)
-
- def _parse_admin_symbols(self, token):
- while 1:
- tag_name = self.ts.get()
- if tag_name == ';':
- break
- self.ts.match(':')
- tag_rev = self.ts.get()
- self.sink.define_tag(tag_name, tag_rev)
-
- def _parse_admin_locks(self, token):
- while 1:
- locker = self.ts.get()
- if locker == ';':
- break
- self.ts.match(':')
- rev = self.ts.get()
- self.sink.set_locker(rev, locker)
-
- def _parse_admin_strict(self, token):
- self.sink.set_locking("strict")
- self.ts.match(';')
-
- def _parse_admin_comment(self, token):
- self.sink.set_comment(self.ts.get())
- self.ts.match(';')
-
- def _parse_admin_expand(self, token):
- expand_mode = self.ts.get()
- self.sink.set_expansion(expand_mode)
- self.ts.match(';')
-
- admin_token_map = {
- 'head' : _parse_admin_head,
- 'branch' : _parse_admin_branch,
- 'access' : _parse_admin_access,
- 'symbols' : _parse_admin_symbols,
- 'locks' : _parse_admin_locks,
- 'strict' : _parse_admin_strict,
- 'comment' : _parse_admin_comment,
- 'expand' : _parse_admin_expand,
- 'desc' : None,
- }
-
- def parse_rcs_admin(self):
- while 1:
- # Read initial token at beginning of line
- token = self.ts.get()
-
- try:
- f = self.admin_token_map[token]
- except KeyError:
- # We're done once we reach the description of the RCS tree
- if token[0] in string.digits:
- self.ts.unget(token)
- return
- else:
- # Chew up "newphrase"
- # warn("Unexpected RCS token: $token\n")
- pass
- else:
- if f is None:
- self.ts.unget(token)
- return
- else:
- f(self, token)
-
- def _parse_rcs_tree_entry(self, revision):
- # Parse date
- self.ts.match('date')
- date = self.ts.get()
- self.ts.match(';')
-
- # Convert date into timestamp
- date_fields = string.split(date, '.')
- # According to rcsfile(5): the year "contains just the last two
- # digits of the year for years from 1900 through 1999, and all the
- # digits of years thereafter".
- if len(date_fields[0]) == 2:
- date_fields[0] = '19' + date_fields[0]
- date_fields = map(string.atoi, date_fields)
- EPOCH = 1970
- if date_fields[0] < EPOCH:
- raise ValueError, 'invalid year'
- timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
-
- # Parse author
- ### NOTE: authors containing whitespace are violations of the
- ### RCS specification. We are making an allowance here because
- ### CVSNT is known to produce these sorts of authors.
- self.ts.match('author')
- author = ' '.join(self._read_until_semicolon())
-
- # Parse state
- self.ts.match('state')
- state = ''
- while 1:
- token = self.ts.get()
- if token == ';':
- break
- state = state + token + ' '
- state = state[:-1] # toss the trailing space
-
- # Parse branches
- self.ts.match('branches')
- branches = self._read_until_semicolon()
-
- # Parse revision of next delta in chain
- self.ts.match('next')
- next = self.ts.get()
- if next == ';':
- next = None
- else:
- self.ts.match(';')
-
- # there are some files with extra tags in them. for example:
- # owner 640;
- # group 15;
- # permissions 644;
- # hardlinks @configure.in@;
- # this is "newphrase" in RCSFILE(5). we just want to skip over these.
- while 1:
- token = self.ts.get()
- if token == 'desc' or token[0] in string.digits:
- self.ts.unget(token)
- break
- # consume everything up to the semicolon
- self._read_until_semicolon()
-
- self.sink.define_revision(revision, timestamp, author, state, branches,
- next)
-
- def parse_rcs_tree(self):
- while 1:
- revision = self.ts.get()
-
- # End of RCS tree description ?
- if revision == 'desc':
- self.ts.unget(revision)
- return
-
- self._parse_rcs_tree_entry(revision)
-
- def parse_rcs_description(self):
- self.ts.match('desc')
- self.sink.set_description(self.ts.get())
-
- def parse_rcs_deltatext(self):
- while 1:
- revision = self.ts.get()
- if revision is None:
- # EOF
- break
- text, sym2, log, sym1 = self.ts.mget(4)
- if sym1 != 'log':
- print `text[:100], sym2[:100], log[:100], sym1[:100]`
- raise RCSExpected(sym1, 'log')
- if sym2 != 'text':
- raise RCSExpected(sym2, 'text')
- ### need to add code to chew up "newphrase"
- self.sink.set_revision_info(revision, log, text)
-
- def parse(self, file, sink):
- self.ts = self.stream_class(file)
- self.sink = sink
-
- self.parse_rcs_admin()
-
- # let sink know when the admin section has been completed
- self.sink.admin_completed()
-
- self.parse_rcs_tree()
-
- # many sinks want to know when the tree has been completed so they can
- # do some work to prep for the arrival of the deltatext
- self.sink.tree_completed()
-
- self.parse_rcs_description()
- self.parse_rcs_deltatext()
-
- # easiest for us to tell the sink it is done, rather than worry about
- # higher level software doing it.
- self.sink.parse_completed()
-
- self.ts = self.sink = None
-
-# --------------------------------------------------------------------------
diff --git a/cvs2svn_rcsparse/debug.py b/cvs2svn_rcsparse/debug.py
deleted file mode 100644
index cfeaf2b..0000000
--- a/cvs2svn_rcsparse/debug.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# -*-python-*-
-#
-# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
-#
-# By using this file, you agree to the terms and conditions set forth in
-# the LICENSE.html file which can be found at the top level of the ViewVC
-# distribution or at http://viewvc.org/license-1.html.
-#
-# For more information, visit http://viewvc.org/
-#
-# -----------------------------------------------------------------------
-
-"""debug.py: various debugging tools for the rcsparse package."""
-
-import time
-
-from __init__ import parse
-import common
-
-
-class DebugSink(common.Sink):
- def set_head_revision(self, revision):
- print 'head:', revision
-
- def set_principal_branch(self, branch_name):
- print 'branch:', branch_name
-
- def define_tag(self, name, revision):
- print 'tag:', name, '=', revision
-
- def set_comment(self, comment):
- print 'comment:', comment
-
- def set_description(self, description):
- print 'description:', description
-
- def define_revision(self, revision, timestamp, author, state,
- branches, next):
- print 'revision:', revision
- print ' timestamp:', timestamp
- print ' author:', author
- print ' state:', state
- print ' branches:', branches
- print ' next:', next
-
- def set_revision_info(self, revision, log, text):
- print 'revision:', revision
- print ' log:', log
- print ' text:', text[:100], '...'
-
-
-class DumpSink(common.Sink):
- """Dump all the parse information directly to stdout.
-
- The output is relatively unformatted and untagged. It is intended as a
- raw dump of the data in the RCS file. A copy can be saved, then changes
- made to the parsing engine, then a comparison of the new output against
- the old output.
- """
- def __init__(self):
- global sha
- import sha
-
- def set_head_revision(self, revision):
- print revision
-
- def set_principal_branch(self, branch_name):
- print branch_name
-
- def define_tag(self, name, revision):
- print name, revision
-
- def set_comment(self, comment):
- print comment
-
- def set_description(self, description):
- print description
-
- def define_revision(self, revision, timestamp, author, state,
- branches, next):
- print revision, timestamp, author, state, branches, next
-
- def set_revision_info(self, revision, log, text):
- print revision, sha.new(log).hexdigest(), sha.new(text).hexdigest()
-
- def tree_completed(self):
- print 'tree_completed'
-
- def parse_completed(self):
- print 'parse_completed'
-
-
-def dump_file(fname):
- parse(open(fname, 'rb'), DumpSink())
-
-def time_file(fname):
- f = open(fname, 'rb')
- s = common.Sink()
- t = time.time()
- parse(f, s)
- t = time.time() - t
- print t
-
-def _usage():
- print 'This is normally a module for importing, but it has a couple'
- print 'features for testing as an executable script.'
- print 'USAGE: %s COMMAND filename,v' % sys.argv[0]
- print ' where COMMAND is one of:'
- print ' dump: filename is "dumped" to stdout'
- print ' time: filename is parsed with the time written to stdout'
- sys.exit(1)
-
-if __name__ == '__main__':
- import sys
- if len(sys.argv) != 3:
- _usage()
- if sys.argv[1] == 'dump':
- dump_file(sys.argv[2])
- elif sys.argv[1] == 'time':
- time_file(sys.argv[2])
- else:
- _usage()
diff --git a/cvs2svn_rcsparse/default.py b/cvs2svn_rcsparse/default.py
deleted file mode 100644
index 57f9fc6..0000000
--- a/cvs2svn_rcsparse/default.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# -*-python-*-
-#
-# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
-#
-# By using this file, you agree to the terms and conditions set forth in
-# the LICENSE.html file which can be found at the top level of the ViewVC
-# distribution or at http://viewvc.org/license-1.html.
-#
-# For more information, visit http://viewvc.org/
-#
-# -----------------------------------------------------------------------
-#
-# This file was originally based on portions of the blame.py script by
-# Curt Hagenlocher.
-#
-# -----------------------------------------------------------------------
-
-import string
-import common
-
-class _TokenStream:
- token_term = frozenset(string.whitespace + ';:')
-
- # the algorithm is about the same speed for any CHUNK_SIZE chosen.
- # grab a good-sized chunk, but not too large to overwhelm memory.
- # note: we use a multiple of a standard block size
- CHUNK_SIZE = 192 * 512 # about 100k
-
-# CHUNK_SIZE = 5 # for debugging, make the function grind...
-
- def __init__(self, file):
- self.rcsfile = file
- self.idx = 0
- self.buf = self.rcsfile.read(self.CHUNK_SIZE)
- if self.buf == '':
- raise RuntimeError, 'EOF'
-
- def get(self):
- "Get the next token from the RCS file."
-
- # Note: we can afford to loop within Python, examining individual
- # characters. For the whitespace and tokens, the number of iterations
- # is typically quite small. Thus, a simple iterative loop will beat
- # out more complex solutions.
-
- buf = self.buf
- lbuf = len(buf)
- idx = self.idx
-
- while 1:
- if idx == lbuf:
- buf = self.rcsfile.read(self.CHUNK_SIZE)
- if buf == '':
- # signal EOF by returning None as the token
- del self.buf # so we fail if get() is called again
- return None
- lbuf = len(buf)
- idx = 0
-
- if buf[idx] not in string.whitespace:
- break
-
- idx = idx + 1
-
- if buf[idx] in ';:':
- self.buf = buf
- self.idx = idx + 1
- return buf[idx]
-
- if buf[idx] != '@':
- end = idx + 1
- token = ''
- while 1:
- # find token characters in the current buffer
- while end < lbuf and buf[end] not in self.token_term:
- end = end + 1
- token = token + buf[idx:end]
-
- if end < lbuf:
- # we stopped before the end, so we have a full token
- idx = end
- break
-
- # we stopped at the end of the buffer, so we may have a partial token
- buf = self.rcsfile.read(self.CHUNK_SIZE)
- lbuf = len(buf)
- idx = end = 0
-
- self.buf = buf
- self.idx = idx
- return token
-
- # a "string" which starts with the "@" character. we'll skip it when we
- # search for content.
- idx = idx + 1
-
- chunks = [ ]
-
- while 1:
- if idx == lbuf:
- idx = 0
- buf = self.rcsfile.read(self.CHUNK_SIZE)
- if buf == '':
- raise RuntimeError, 'EOF'
- lbuf = len(buf)
- i = string.find(buf, '@', idx)
- if i == -1:
- chunks.append(buf[idx:])
- idx = lbuf
- continue
- if i == lbuf - 1:
- chunks.append(buf[idx:i])
- idx = 0
- buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
- if buf == '@':
- raise RuntimeError, 'EOF'
- lbuf = len(buf)
- continue
- if buf[i + 1] == '@':
- chunks.append(buf[idx:i+1])
- idx = i + 2
- continue
-
- chunks.append(buf[idx:i])
-
- self.buf = buf
- self.idx = i + 1
-
- return ''.join(chunks)
-
-# _get = get
-# def get(self):
- token = self._get()
- print 'T:', `token`
- return token
-
- def match(self, match):
- "Try to match the next token from the input buffer."
-
- token = self.get()
- if token != match:
- raise common.RCSExpected(token, match)
-
- def unget(self, token):
- "Put this token back, for the next get() to return."
-
- # Override the class' .get method with a function which clears the
- # overridden method then returns the pushed token. Since this function
- # will not be looked up via the class mechanism, it should be a "normal"
- # function, meaning it won't have "self" automatically inserted.
- # Therefore, we need to pass both self and the token thru via defaults.
-
- # note: we don't put this into the input buffer because it may have been
- # @-unescaped already.
-
- def give_it_back(self=self, token=token):
- del self.get
- return token
-
- self.get = give_it_back
-
- def mget(self, count):
- "Return multiple tokens. 'next' is at the end."
- result = [ ]
- for i in range(count):
- result.append(self.get())
- result.reverse()
- return result
-
-
-class Parser(common._Parser):
- stream_class = _TokenStream
diff --git a/cvs2svn_rcsparse/parse_rcs_file.py b/cvs2svn_rcsparse/parse_rcs_file.py
deleted file mode 100644
index 215845d..0000000
--- a/cvs2svn_rcsparse/parse_rcs_file.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/python2
-
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2006-2007 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""Parse an RCS file, showing the rcsparse callbacks that are called.
-
-This program is useful to see whether an RCS file has a problem (in
-the sense of not being parseable by rcsparse) and also to illuminate
-the correspondence between RCS file contents and rcsparse callbacks.
-
-The output of this program can also be considered to be a kind of
-'canonical' format for RCS files, at least in so far as rcsparse
-returns all relevant information in the file and provided that the
-order of callbacks is always the same."""
-
-
-import sys
-import os
-
-
-class Logger:
- def __init__(self, f, name):
- self.f = f
- self.name = name
-
- def __call__(self, *args):
- self.f.write(
- '%s(%s)\n' % (self.name, ', '.join(['%r' % arg for arg in args]),)
- )
-
-
-class LoggingSink:
- def __init__(self, f):
- self.f = f
-
- def __getattr__(self, name):
- return Logger(self.f, name)
-
-
-if __name__ == '__main__':
- # Since there is nontrivial logic in __init__.py, we have to import
- # parse() via that file. First make sure that the directory
- # containing this script is in the path:
- sys.path.insert(0, os.path.dirname(sys.argv[0]))
-
- from __init__ import parse
-
- if sys.argv[1:]:
- for path in sys.argv[1:]:
- if os.path.isfile(path) and path.endswith(',v'):
- parse(
- open(path, 'rb'), LoggingSink(sys.stdout)
- )
- else:
- sys.stderr.write('%r is being ignored.\n' % path)
- else:
- parse(sys.stdin, LoggingSink(sys.stdout))
-
-
diff --git a/cvs2svn_rcsparse/rcparse_redundant_work.patch b/cvs2svn_rcsparse/rcparse_redundant_work.patch
deleted file mode 100644
index b574dd2..0000000
--- a/cvs2svn_rcsparse/rcparse_redundant_work.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-=== modified file 'cvs2svn_rcsparse/default.py'
---- cvs2svn_rcsparse/default.py 2007-11-18 23:05:32 +0000
-+++ cvs2svn_rcsparse/default.py 2010-01-23 10:21:47 +0000
-@@ -19,7 +19,7 @@
- import common
-
- class _TokenStream:
-- token_term = string.whitespace + ';:'
-+ token_term = frozenset(string.whitespace + ';:')
-
- # the algorithm is about the same speed for any CHUNK_SIZE chosen.
- # grab a good-sized chunk, but not too large to overwhelm memory.
-@@ -44,15 +44,17 @@
- # out more complex solutions.
-
- buf = self.buf
-+ lbuf = len(buf)
- idx = self.idx
-
- while 1:
-- if idx == len(buf):
-+ if idx == lbuf:
- buf = self.rcsfile.read(self.CHUNK_SIZE)
- if buf == '':
- # signal EOF by returning None as the token
- del self.buf # so we fail if get() is called again
- return None
-+ lbuf = len(buf)
- idx = 0
-
- if buf[idx] not in string.whitespace:
-@@ -60,7 +62,7 @@
-
- idx = idx + 1
-
-- if buf[idx] == ';' or buf[idx] == ':':
-+ if buf[idx] in ';:':
- self.buf = buf
- self.idx = idx + 1
- return buf[idx]
-@@ -70,17 +72,18 @@
- token = ''
- while 1:
- # find token characters in the current buffer
-- while end < len(buf) and buf[end] not in self.token_term:
-+ while end < lbuf and buf[end] not in self.token_term:
- end = end + 1
- token = token + buf[idx:end]
-
-- if end < len(buf):
-+ if end < lbuf:
- # we stopped before the end, so we have a full token
- idx = end
- break
-
- # we stopped at the end of the buffer, so we may have a partial token
- buf = self.rcsfile.read(self.CHUNK_SIZE)
-+ lbuf = len(buf)
- idx = end = 0
-
- self.buf = buf
-@@ -94,22 +97,24 @@
- chunks = [ ]
-
- while 1:
-- if idx == len(buf):
-+ if idx == lbuf:
- idx = 0
- buf = self.rcsfile.read(self.CHUNK_SIZE)
- if buf == '':
- raise RuntimeError, 'EOF'
-+ lbuf = len(buf)
- i = string.find(buf, '@', idx)
- if i == -1:
- chunks.append(buf[idx:])
-- idx = len(buf)
-+ idx = lbuf
- continue
-- if i == len(buf) - 1:
-+ if i == lbuf - 1:
- chunks.append(buf[idx:i])
- idx = 0
- buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
- if buf == '@':
- raise RuntimeError, 'EOF'
-+ lbuf = len(buf)
- continue
- if buf[i + 1] == '@':
- chunks.append(buf[idx:i+1])
-@@ -121,7 +126,7 @@
- self.buf = buf
- self.idx = i + 1
-
-- return string.join(chunks, '')
-+ return ''.join(chunks)
-
- # _get = get
- # def get(self):
-
diff --git a/cvs2svn_rcsparse/run-tests.py b/cvs2svn_rcsparse/run-tests.py
deleted file mode 100644
index eb9c3ea..0000000
--- a/cvs2svn_rcsparse/run-tests.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/python2
-
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://viewvc.tigris.org/.
-# ====================================================================
-
-"""Run tests of rcsparse code."""
-
-import sys
-import os
-import glob
-from cStringIO import StringIO
-from difflib import Differ
-
-# Since there is nontrivial logic in __init__.py, we have to import
-# parse() via that file. First make sure that the directory
-# containing this script is in the path:
-script_dir = os.path.dirname(sys.argv[0])
-sys.path.insert(0, script_dir)
-
-from __init__ import parse
-from parse_rcs_file import LoggingSink
-
-
-test_dir = os.path.join(script_dir, 'test-data')
-
-filelist = glob.glob(os.path.join(test_dir, '*,v'))
-filelist.sort()
-
-all_tests_ok = 1
-
-for filename in filelist:
- sys.stderr.write('%s: ' % (filename,))
- f = StringIO()
- try:
- parse(open(filename, 'rb'), LoggingSink(f))
- except Exception, e:
- sys.stderr.write('Error parsing file: %s!\n' % (e,))
- all_tests_ok = 0
- else:
- output = f.getvalue()
-
- expected_output_filename = filename[:-2] + '.out'
- expected_output = open(expected_output_filename, 'rb').read()
-
- if output == expected_output:
- sys.stderr.write('OK\n')
- else:
- sys.stderr.write('Output does not match expected output!\n')
- differ = Differ()
- for diffline in differ.compare(
- expected_output.splitlines(1), output.splitlines(1)
- ):
- sys.stderr.write(diffline)
- all_tests_ok = 0
-
-if all_tests_ok:
- sys.exit(0)
-else:
- sys.exit(1)
-
diff --git a/cvs2svn_rcsparse/texttools.py b/cvs2svn_rcsparse/texttools.py
deleted file mode 100644
index 7c713eb..0000000
--- a/cvs2svn_rcsparse/texttools.py
+++ /dev/null
@@ -1,348 +0,0 @@
-# -*-python-*-
-#
-# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
-#
-# By using this file, you agree to the terms and conditions set forth in
-# the LICENSE.html file which can be found at the top level of the ViewVC
-# distribution or at http://viewvc.org/license-1.html.
-#
-# For more information, visit http://viewvc.org/
-#
-# -----------------------------------------------------------------------
-
-import string
-
-# note: this will raise an ImportError if it isn't available. the rcsparse
-# package will recognize this and switch over to the default parser.
-from mx import TextTools
-
-import common
-
-
-# for convenience
-_tt = TextTools
-
-_idchar_list = map(chr, range(33, 127)) + map(chr, range(160, 256))
-_idchar_list.remove('$')
-_idchar_list.remove(',')
-#_idchar_list.remove('.') # leave as part of 'num' symbol
-_idchar_list.remove(':')
-_idchar_list.remove(';')
-_idchar_list.remove('@')
-_idchar = string.join(_idchar_list, '')
-_idchar_set = _tt.set(_idchar)
-
-_onechar_token_set = _tt.set(':;')
-
-_not_at_set = _tt.invset('@')
-
-_T_TOKEN = 30
-_T_STRING_START = 40
-_T_STRING_SPAN = 60
-_T_STRING_END = 70
-
-_E_COMPLETE = 100 # ended on a complete token
-_E_TOKEN = 110 # ended mid-token
-_E_STRING_SPAN = 130 # ended within a string
-_E_STRING_END = 140 # ended with string-end ('@') (could be mid-@@)
-
-_SUCCESS = +100
-
-_EOF = 'EOF'
-_CONTINUE = 'CONTINUE'
-_UNUSED = 'UNUSED'
-
-
-# continuation of a token over a chunk boundary
-_c_token_table = (
- (_T_TOKEN, _tt.AllInSet, _idchar_set),
- )
-
-class _mxTokenStream:
-
- # the algorithm is about the same speed for any CHUNK_SIZE chosen.
- # grab a good-sized chunk, but not too large to overwhelm memory.
- # note: we use a multiple of a standard block size
- CHUNK_SIZE = 192 * 512 # about 100k
-
-# CHUNK_SIZE = 5 # for debugging, make the function grind...
-
- def __init__(self, file):
- self.rcsfile = file
- self.tokens = [ ]
- self.partial = None
-
- self.string_end = None
-
- def _parse_chunk(self, buf, start=0):
- "Get the next token from the RCS file."
-
- buflen = len(buf)
-
- assert start < buflen
-
- # construct a tag table which refers to the buffer we need to parse.
- table = (
- #1: ignore whitespace. with or without whitespace, move to the next rule.
- (None, _tt.AllInSet, _tt.whitespace_set, +1),
-
- #2
- (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),
-
- #3: accumulate token text and exit, or move to the next rule.
- (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2),
-
- #4
- (_E_TOKEN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS),
-
- #5: single character tokens exit immediately, or move to the next rule
- (_UNUSED, _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2),
-
- #6
- (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS),
-
- #7: if this isn't an '@' symbol, then we have a syntax error (go to a
- # negative index to indicate that condition). otherwise, suck it up
- # and move to the next rule.
- (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'),
-
- #8
- (None, _tt.Is, '@', +4, +1),
- #9
- (buf, _tt.Is, '@', +1, -1),
- #10
- (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1),
- #11
- (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS),
-
- #12
- (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),
-
- #13: suck up everything that isn't an AT. go to next rule to look for EOF
- (buf, _tt.AllInSet, _not_at_set, 0, +1),
-
- #14: go back to look for double AT if we aren't at the end of the string
- (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS),
- )
-
- # Fast, texttools may be, but it's somewhat lacking in clarity.
- # Here's an attempt to document the logic encoded in the table above:
- #
- # Flowchart:
- # _____
- # / /\
- # 1 -> 2 -> 3 -> 5 -> 7 -> 8 -> 9 -> 10 -> 11
- # | \/ \/ \/ /\ \/
- # \ 4 6 12 14 /
- # \_______/_____/ \ / /
- # \ 13 /
- # \__________________________________________/
- #
- # #1: Skip over any whitespace.
- # #2: If now EOF, exit with code _E_COMPLETE.
- # #3: If we have a series of characters in _idchar_set, then:
- # #4: Output them as a token, and go back to #1.
- # #5: If we have a character in _onechar_token_set, then:
- # #6: Output it as a token, and go back to #1.
- # #7: If we do not have an '@', then error.
- # If we do, then log a _T_STRING_START and continue.
- # #8: If we have another '@', continue on to #9. Otherwise:
- # #12: If now EOF, exit with code _E_STRING_SPAN.
- # #13: Record the slice up to the next '@' (or EOF).
- # #14: If now EOF, exit with code _E_STRING_SPAN.
- # Otherwise, go back to #8.
- # #9: If we have another '@', then we've just seen an escaped
- # (by doubling) '@' within an @-string. Record a slice including
- # just one '@' character, and jump back to #8.
- # Otherwise, we've *either* seen the terminating '@' of an @-string,
- # *or* we've seen one half of an escaped @@ sequence that just
- # happened to be split over a chunk boundary - in either case,
- # we continue on to #10.
- # #10: Log a _T_STRING_END.
- # #11: If now EOF, exit with _E_STRING_END. Otherwise, go back to #1.
-
- success, taglist, idx = _tt.tag(buf, table, start)
-
- if not success:
- ### need a better way to report this error
- raise common.RCSIllegalCharacter()
- assert idx == buflen
-
- # pop off the last item
- last_which = taglist.pop()
-
- i = 0
- tlen = len(taglist)
- while i < tlen:
- if taglist[i] == _T_STRING_START:
- j = i + 1
- while j < tlen:
- if taglist[j] == _T_STRING_END:
- s = _tt.join(taglist, '', i+1, j)
- del taglist[i:j]
- tlen = len(taglist)
- taglist[i] = s
- break
- j = j + 1
- else:
- assert last_which == _E_STRING_SPAN
- s = _tt.join(taglist, '', i+1)
- del taglist[i:]
- self.partial = (_T_STRING_SPAN, [ s ])
- break
- i = i + 1
-
- # figure out whether we have a partial last-token
- if last_which == _E_TOKEN:
- self.partial = (_T_TOKEN, [ taglist.pop() ])
- elif last_which == _E_COMPLETE:
- pass
- elif last_which == _E_STRING_SPAN:
- assert self.partial
- else:
- assert last_which == _E_STRING_END
- self.partial = (_T_STRING_END, [ taglist.pop() ])
-
- taglist.reverse()
- taglist.extend(self.tokens)
- self.tokens = taglist
-
- def _set_end(self, taglist, text, l, r, subtags):
- self.string_end = l
-
- def _handle_partial(self, buf):
- which, chunks = self.partial
- if which == _T_TOKEN:
- success, taglist, idx = _tt.tag(buf, _c_token_table)
- if not success:
- # The start of this buffer was not a token. So the end of the
- # prior buffer was a complete token.
- self.tokens.insert(0, string.join(chunks, ''))
- else:
- assert len(taglist) == 1 and taglist[0][0] == _T_TOKEN \
- and taglist[0][1] == 0 and taglist[0][2] == idx
- if idx == len(buf):
- #
- # The whole buffer was one huge token, so we may have a
- # partial token again.
- #
- # Note: this modifies the list of chunks in self.partial
- #
- chunks.append(buf)
-
- # consumed the whole buffer
- return len(buf)
-
- # got the rest of the token.
- chunks.append(buf[:idx])
- self.tokens.insert(0, string.join(chunks, ''))
-
- # no more partial token
- self.partial = None
-
- return idx
-
- if which == _T_STRING_END:
- if buf[0] != '@':
- self.tokens.insert(0, string.join(chunks, ''))
- return 0
- chunks.append('@')
- start = 1
- else:
- start = 0
-
- self.string_end = None
- string_table = (
- (None, _tt.Is, '@', +3, +1),
- (_UNUSED, _tt.Is + _tt.AppendMatch, '@', +1, -1),
- (self._set_end, _tt.Skip + _tt.CallTag, 0, 0, _SUCCESS),
-
- (None, _tt.EOF, _tt.Here, +1, _SUCCESS),
-
- # suck up everything that isn't an AT. move to next rule to look
- # for EOF
- (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _not_at_set, 0, +1),
-
- # go back to look for double AT if we aren't at the end of the string
- (None, _tt.EOF, _tt.Here, -5, _SUCCESS),
- )
-
- success, unused, idx = _tt.tag(buf, string_table,
- start, len(buf), chunks)
-
- # must have matched at least one item
- assert success
-
- if self.string_end is None:
- assert idx == len(buf)
- self.partial = (_T_STRING_SPAN, chunks)
- elif self.string_end < len(buf):
- self.partial = None
- self.tokens.insert(0, string.join(chunks, ''))
- else:
- self.partial = (_T_STRING_END, chunks)
-
- return idx
-
- def _parse_more(self):
- buf = self.rcsfile.read(self.CHUNK_SIZE)
- if not buf:
- return _EOF
-
- if self.partial:
- idx = self._handle_partial(buf)
- if idx is None:
- return _CONTINUE
- if idx < len(buf):
- self._parse_chunk(buf, idx)
- else:
- self._parse_chunk(buf)
-
- return _CONTINUE
-
- def get(self):
- try:
- return self.tokens.pop()
- except IndexError:
- pass
-
- while not self.tokens:
- action = self._parse_more()
- if action == _EOF:
- return None
-
- return self.tokens.pop()
-
-
-# _get = get
-# def get(self):
- token = self._get()
- print 'T:', `token`
- return token
-
- def match(self, match):
- if self.tokens:
- token = self.tokens.pop()
- else:
- token = self.get()
-
- if token != match:
- raise common.RCSExpected(token, match)
-
- def unget(self, token):
- self.tokens.append(token)
-
- def mget(self, count):
- "Return multiple tokens. 'next' is at the end."
- while len(self.tokens) < count:
- action = self._parse_more()
- if action == _EOF:
- ### fix this
- raise RuntimeError, 'EOF hit while expecting tokens'
- result = self.tokens[-count:]
- del self.tokens[-count:]
- return result
-
-
-class Parser(common._Parser):
- stream_class = _mxTokenStream