aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Harring <ferringb@gentoo.org>2005-07-10 09:23:46 +0000
committerBrian Harring <ferringb@gentoo.org>2005-07-10 09:23:46 +0000
commit713ff68875b8bc5acafa51d52195dd2a74606947 (patch)
tree6cc368966cfcff5c5d6e82384128304f6572bdc7 /rewrite-misc
parentwhee. import of embryonic portage rewrite. (diff)
downloadportage-cvs-713ff68875b8bc5acafa51d52195dd2a74606947.tar.gz
portage-cvs-713ff68875b8bc5acafa51d52195dd2a74606947.tar.bz2
portage-cvs-713ff68875b8bc5acafa51d52195dd2a74606947.zip
plop.
Diffstat (limited to 'rewrite-misc')
-rw-r--r--rewrite-misc/ChangeLog16
-rw-r--r--rewrite-misc/HORKED32
-rw-r--r--rewrite-misc/conf_default_types132
-rw-r--r--rewrite-misc/config45
-rw-r--r--rewrite-misc/config.txt155
-rw-r--r--rewrite-misc/default.config40
-rw-r--r--rewrite-misc/example-complex.config55
-rw-r--r--rewrite-misc/intro417
-rw-r--r--rewrite-misc/layout.txt273
9 files changed, 1165 insertions, 0 deletions
diff --git a/rewrite-misc/ChangeLog b/rewrite-misc/ChangeLog
new file mode 100644
index 0000000..46c62ae
--- /dev/null
+++ b/rewrite-misc/ChangeLog
@@ -0,0 +1,16 @@
+06/07/05: harring: config.{repo,cache,domain,sync} works fine, new dict type. portage.util.dicts holds the dict variants, including
+ IndexableSequence. added portage.util.currying. Use it. it rocks.
+06/07/05: harring: config instantation of repo works now. config.repo is a bit screwed up, but will be fixed.
+06/06/05: harring: ebuild depends works now. config instantiation should work for base configurations, but nothing complex.
+ cache and location basically, nothing like package.* wrappers (which will be via a config callable)
+06/03/05: harring: changelog addition. portage.ebuild.processor work namely, eclass_cache inclusive.
+More additions to intro, specifically re: avoiding race condition of gpg verification then using a file with open perms
+(short version, abuse preloaded_eclass type trick).
+Yanked *ROOTPATH. it's a kludge, a bad one at that. Will break uclibc profile, but it's a kludge to cover up portages shitty path handling.
+portage.ebuild.processor.processor.generic_handler is of note, generic waiting loop structure. basically, any ebd interaction after setup
+is handled here, with command:callable dict handed in, etc.
+ebuild sourcing for metadata should be finished end of day, since I broke it all apart via namespace and killing off old ebuild.py.
+ebuild*.sh no longer has /usr/lib/portage/bin/ hardcoded in, if unset and not daemonize ebuild.sh sets it to the usual. ebd requests it
+from the processor, which returns portage.const.PORTAGE_BIN_PATH
+Broke inherit into two funcs, basically breaking the core of inherit out (the source call/semantics). defaults to old behaviour,
+but ebd will override it (look at eclass_cache.get_eclass_* and you'll understand, ellucidates in the intro ebd section)
diff --git a/rewrite-misc/HORKED b/rewrite-misc/HORKED
new file mode 100644
index 0000000..d525812
--- /dev/null
+++ b/rewrite-misc/HORKED
@@ -0,0 +1,32 @@
+list what's horked, note your name by it if you want dibs.
+This ain't complete either.
+
+namespace fixes required, plus potentially a bit of rewrite
+portage.sync
+portage.transports # transports needs to be unified behind fetcher design.
+ # need a fetchable class also.
+
+not finished-
+portage.cache.multiplex # should be a master/slave class, master for querying, both for updates/deletions
+portage.restrictions
+potage.config.central # need's to be renamed, and needs new.instancemethod hacks, read intro for explanation.
+
+portage.vdb # consult harring if you're after this, since this is a helluva lot more complex then just a repo
+portage.binpkg # bind it to portage.ebuild whereever possible
+
+not started
+portage.config.domain # consult intro doc if you attempt it, and collect feedback from jason/brian regarding it please
+ # harring will want it one way which probably slightly conflicts with what jason wants (and vice versa),
+ # so be aware it's probably going to be a shifting class definition.
+ # still need a prototype though.
+
+
+not started
+portage.ebuild.repository.gpgtree
+# ebuild repository derivative, bind gpg verification within metadata creation. including manifest verification (not just signed).
+# need a design for having gpg trust mapped into repositories (overkill? really need multiple trust rings?)
+
+not even a scratched out yet
+portage.*.fetchable # find a place for this. simple one however.
+portage.*.(merge|build)operation
+# no base class defined yet, see intro
diff --git a/rewrite-misc/conf_default_types b/rewrite-misc/conf_default_types
new file mode 100644
index 0000000..c8dfc13
--- /dev/null
+++ b/rewrite-misc/conf_default_types
@@ -0,0 +1,132 @@
+# so what the hell does this all mean? It's section type definitions.
+# defaults specifies the name of default settings, which much exist in the section definition.
+# required specifies what settings are required in any user config for that section.
+# incrementals specifies what settings are stacked, orig_setting + new_setting. not limited to list types only, but probably should be
+# list, str, bool are lists of settings, stating "this setting is of this type", so at runtime any config settings processed are
+# automatically converted to that type.
+# section_ref are a _special_ type of setting type. must reference another section, and can be multiple (specified via list).
+# not possible to do incrementals on section types, so don't attempt it (it will not act as you expect).
+# note defaults get passed through the setting type conversion mechanism also.
+# instantiate is the python namespace addie of a callable that does instantiation of that section (rather then a generic handler).
+# generic handler ignores any settings that don't fall aren't addressed/fscked with via default mangling mentioned above
+# (change this later?)
+#
+# any defined section setting types that are in the config are verified to exist during config collapsing.
+# if a custom instantiation func is defined for that section type, the instantiation func is required to instantiate the sections
+# (config.instantiate_section(section) effectively). Otherwise, the generic handler does the instantiation are substition for
+# callable instantiation on it's own.
+#
+# positional specifies positional args (which all must be required), only actually used if generic instantiator is used.
+# label is special setting name, akin to __name__ (literally, that's it value), *only* when stated as a default
+#
+# iow, if you have your own instantiation func for a section, you have to instantiate any section types you define. it's verified
+# to exist, but type is not verified (class type verification within specified callable should suffice however).
+#
+# custom instantiation func prototype (the actual instantiation handler):
+# def handler(config, callable, section_label, section_config)
+#
+# config == portage.config.central.config instance,
+# callable == class definition for that section,
+# section_label == section's label/name (hard one I know).
+# section_config == collapsed section's config as a dict.
+#
+#
+# potential extensions:
+#
+# types are limited to what the generic parser knows of. that should be extended.
+#
+# inter-section references should be handled (instantiation) via the generic parser also.
+# ex: instantiation of domain should be given instantiated repos/profiles instead of section names.
+#
+# there is no way to have defaults represented at the config level for specialized callables.
+# decide if this is good or bad. in effect, require's are generalized for a section type, which may not apply
+#
+# there isn't any mechanism for noting what supports interop (think sync + repo), aside from instantiating
+# and having it bitch. fluid design/intention of the final parsing of user config, but this is a drawback of
+# said design.
+# think about ways to curb that, lest users get too annoying with tracebacks ;)
+#
+# right now, there is now way to specify that a callable requires certain settings beyond section requirements.
+# there _should_ be an automated way to get that info, so that the generic parser can carry that load.
+#
+# this file also will be 'fun' for doing per keyword defaults as per the portage norm.
+# probably need to support exec within this file, with a default exec section pointing at
+# a location on the fs that defines further defaults, although that makes parsing this file fun.
+#
+# short version, with the rules established, you would need to define a metasection, and apply the
+# inherit/incrementals crap to constructing at run time the section definitions.
+# or you need a smarter parser/generator for this file.
+# leave that to the schmuck who writes the autotools integration for this
+# (why oh why do I suspect that's going to be my stupid ass? :)
+# ~harring
+#
+# ps. a dpkg repo (fex) _should_ be a different section type imo, and whatever checks are done for
+# assembling a domain should do isinstance checks on a repo base, rather then (potentially) enforcing
+# type restriction in the config specification (repositories = section label, and verifying that label's type)
+# or not. think about it...
+#
+# pps: any such isinstance check _must_ do the check after an instantiation attempt. remember that class is in reality
+# a callable, which can include functions that are _not_ classes, but return objects via whatever screwed up logic internal
+# to the func (think static factory func for accessing a singleton, although bad example since you can implement singleton
+# within the class definition via __new__, but I digress)...
+#
+# ppps: if it's required, state it. don't assume just cause it is a default, that not listing a setting in required is valid
+# it's anal, but it _will_ help to catch screwups in section definition changes.
+#
+# pppps: Fairly sure at some point the town folk will come around with the pitchforks, torches and that special glimmer in their eyes
+# if positional args for initialization isn't supported in someway (regardless of the fun of implementing it), so think about
+# possibilities for pulling it off (required ordering lifted from section def and/or callable?)
+
+
+[profile]
+incrementals = USE FEATURES ACCEPT_KEYWORDS ACCEPT_LICENSE CONFIG_PROTECT_MASK CONFIG_PROTECT PRELINK_PATH PRELINK_PATH_MASK
+list = USE FEATURES ACCEPT_KEYWORDS ACCEPT_LICENSE CONFIG_PROTECT_MASK CONFIG_PROTECT PRELINK_PATH PRELINK_PATH_MASK
+ package.keywords package.use package.unmask package.mask
+defaults = class
+class = portage.ebuild.profiles.oldstyle
+required = class
+
+[domain]
+instantiate = portage.config.parser.domain
+required = repositories profile root class
+section_ref = repositories profile
+pre = config repositories
+incrementals = USE FEATURES ACCEPT_KEYWORDS ACCEPT_LICENSE CONFIG_PROTECT_MASK CONFIG_PROTECT PRELINK_PATH PRELINK_PATH_MASK
+list = USE FEATURES ACCEPT_KEYWORDS ACCEPT_LICENSE CONFIG_PROTECT_MASK CONFIG_PROTECT PRELINK_PATH PRELINK_PATH_MASK
+ package.keywords package.use package.unmask package.mask repositories
+defaults = class root
+class = portage.config.domain
+root = /
+
+[repo]
+#instantiate = portage.config.instantiation.repo
+required = class location
+defaults = class
+class = portage.ebuild.repository
+section_ref = cache sync sync_cache
+#list = cache
+package.keywords = portage.config.handler.keywords
+package.mask = portage.config.handler.mask
+package.unmask = portage.config.handler.unmask
+
+[cache]
+required = class location label auxdbkeys
+defaults = class auxdbkeys location label
+class = portage.cache.flat_list.database
+auxdbkeys = %(metadata_keys)s
+location = %(cache_base_loc)s
+list = auxdbkeys
+positional = location label auxdbkeys
+
+[exec]
+autoexec = true
+
+[sync]
+required = class url
+defaults = class
+class = portage.sync.rsync
+
+[DEFAULT]
+metadata_keys = DEPEND RDEPEND SLOT SRC_URI RESTRICT HOMEPAGE LICENSE DESCRIPTION KEYWORDS INHERITED IUSE CDEPEND
+ PDEPEND PROVIDE
+cache_base_loc = /var/cache/edb/dep/
diff --git a/rewrite-misc/config b/rewrite-misc/config
new file mode 100644
index 0000000..2e6eabf
--- /dev/null
+++ b/rewrite-misc/config
@@ -0,0 +1,45 @@
+[/usr/portage]
+type = cache
+
+[rsync repo]
+type = repo
+class = portage.ebuild.repository
+location = /usr/portage
+cache = '/usr/portage'
+
+#[vdb]
+#type = repo
+#class = portage.installed_pkg.repository
+#location = /var/db/pkg
+
+[base config]
+type = config
+USE = nptl nptlonly -gnome -oss mad matroska faad mpg avi -arts mmx mmxext flac bidi dvd ssl sdl xvid 3dnow 3dnowext
+ sse sse2 mythtv rtc -svga -ipv6 win32codecs -esd
+DISTDIR = "/space/distfiles"
+FEATURES = confcache parallel-fetch
+#package.use = %(CONFDIR)/package.use
+#package.mask = %(CONFDIR)/package.mask
+#package.unmask = %(CONFDIR)/package.unmask
+#package.keywords = %(CONFDIR)/package.keywords
+
+[profile]
+type = profile
+base_repo = 'rsync repo'
+profile = default-linux/x86/2005.0
+
+[x86]
+CFLAGS = "-O2 -march=pentium4 -pipe"
+CHOST = "i686-pc-linux-gnu"
+profile = profile
+inherit = 'base config'
+ACCEPT_KEYWORDS = "x86"
+inherit = 'base config'
+
+[livefs domain]
+inherit = x86
+type = domain
+default = yes
+repositories = "rsync repo"
+#vdb
+
diff --git a/rewrite-misc/config.txt b/rewrite-misc/config.txt
new file mode 100644
index 0000000..ea360ed
--- /dev/null
+++ b/rewrite-misc/config.txt
@@ -0,0 +1,155 @@
+===NOTE===
+If you're _truly_ interested in what config does/supports currently, look at conf_default_types. lot of info there.
+This is kept for historical reasons, and cause it explains the idea/layout a bit more
+That said, it _is_ out of date (sorry, shit moves fast), config section no longer exists, it was merged into domain
+
+make.conf backwards compatibility bit still applies, and is pretty straightforward/easy to pull off actually.
+~harring
+===BACK TO YOUR PREVIOUSLY SCHEDULED PROGRAMMING===
+
+win.ini style format (yay).
+
+sections that define type are acted upon. no type definition, and the section is ignored unless explicitly inherited
+if type is defined, exempting configs type, class must be defined.
+extra options are handed off to the class for initialization
+
+repository for example,
+
+[rsync repo]
+type = repo
+class = portage.ebuild.repository
+location = /usr/portage
+
+each section is capable of specifying inherit targets. inherit's specify the base section to pull in, and override.
+can still access the original value via similar bash trick-
+CFLAGS = %(CFLAGS) -g
+for example.
+
+few words on each section (note the type of the section is declareed explicitly, otherwise it's just a config 'group' that is only
+used by inherit's)
+
+[repo]
+ must specify class
+ REPO_LABEL is automatically defined for all cache instances.
+ frozen is a boolean, specifies if cache staleness is corrected, or errored out.
+ can (and is advisable for rsync) specify a sync_cache.
+ this cache is instantiated after a sync, the normal cache's cloned (contents) to it on sync.
+ if repo is sync'd, it must drop it's assumptions about the current tree. In other words, you update it, it forgets what it knows,
+ and starts mapping things out again. Repo must be _totally_ live, no "pardon, reinstantiate it after syncing".
+ Shouldn't be hard via IndexableSequence; just add a method (forget?) that wipes any internal caches to the repo.
+ remote repo's, unless caching, shouldn't suffer this and should just set .forget to lambda : True
+
+[sync]
+ can only be bound to a repo.
+ must specify class
+
+[cache]
+ must specify class
+ REPO_LABEL is available; it's the repo 'label' (section, really) that the cache is associated with (repo specifies this)
+ if no path is specified, assumed path is %(CONFIG_PATH)/%(REPO_LABEL) (# change in behaviour, but not user visible)
+ can only be bound to a repo
+
+[config]
+ if a class is specified, the class must be a callable, and will be handed that sections config.
+ the config section that defines a class is removed, and the config(s?) returned by the callable are inserted into the global
+ config. returned types are marked as configs (eg, can't slip a domain in via this route).
+
+[domain]
+ config(s?) specified must be type=config
+ class is optional. if it's not specified, it's assumed to be a stand alone domain (think root="/", normal keywords).
+ if class is specified, it's passed the remaining config options, with the global config passed as the first arg (positional)
+ why allow class? cause it can be used to setup/specify interdomain dependencies, requiring the toolchain for ppc on an x86
+ system for example, or being abused for doing interdomain deps for chroot domains.
+
+ obviously that stuff needs to be worked out, but this 'hook' should allow it. fun fun.
+
+[exec]
+ the fun one.
+ post parsing the config file that holds a type=exec section, _all_ exec sections are one by one removed and executed.
+ valid 'commands' is
+ include = some-other-file
+
+ don't try including a file multiple times. cyclic detection will need to be implemented at some point.
+ if class is specified for an exec type, it's the path to a callable that returns a global level config, and the existing
+ global level config is updated with the returned config (iow, exec can override what's defined in the file)
+
+
+Instantiating repos/caches/syncs/etc occurs on demand, as needed, with the exception of exec sections.
+In other words, the config *could* have errors in it, but they won't be hit till the config is totally initialized.
+
+Secondary tool (simple bugger) that just requests all domains/repos from the config would handle this; would force full parsing
+of the config (including all package.*), and would chuck errors if encountered.
+Otherwise, for sanity/speed sake, config is executed/instantiated as needed determined by caller requests.
+
+
+What does the class see for instantiation?
+
+dependant on the type of the section. config parser knows to remove
+package.use, package.keywords, package.mask, package.unmask, and allowed_merges, which name file(s) that are parsed, and used for
+a visibility wrapper of the repo. Any slaving of repo's to a repo that defines visibility wrappers gets the wrapped repo, not the
+raw repo. All package.* are effectively location types, meaning they're (currently) file paths, with %(CONFIG_PATH)/ assumed
+That assumption may change however.
+
+remaining options after any mangling above are handed to the class type specified for a section.
+so portage.ebuild.repository.__init__ will get basedir="/usr/portage" for a dict arg. (Example above)
+
+
+allowed_merges file
+
+Specifies atoms that control what can be merged. Think of it as either the uber "you ain't merging this bubba" for vdb (not very useful),
+or, bit more useful, list of atoms that are binpkg'd, specifiable per merge_target repo. can't apply it to an ebuild repo, can
+apply it to a binpkg/rpm repo though.
+
+
+package.*, visibility wrappers.
+
+A repo class *also* can, and likely will define it's own visibility wrappers, as will the config (ACCEPT_KEYWORDS).
+Minor design note; wrappers take away from repo.match's capabilities to hand off crap to a potentially faster resolver remotely
+(consider situation where the repo is a rdbms; visibility filter can be handed off to pl/sql funcs or massive where clause)
+
+determination of whether or not a repo defines it's own global visibility filter is done via inspection of the repo class-
+repo.visibility_config
+If set to False, niadda, otherwise it is either a callable that returns, or is already, a config object.
+In other words, the repo on initialization is _raw_, callers must instantiate the visibility wrapper (this is a candidate for api wrapping)
+
+The wrapper is given an instantiated repository instance (or possibly a callable that returns it, this isn't set in stone however) as
+first arg,
+Wrapper should/will be smart enough to filter out restrictions that don't apply to that repo.
+
+possibly after category is mapped, drop restrictions that don't match categories. Debatable.
+Not debatable, filter out restrictions that are domain/repo specific, eg, don't hold onto a restriction for repo xyz when you're
+wrapping abc.
+
+visibility wrappers are *not* added if repo.allow_visibility_filtering exists, and is true.
+VDB doesn't need a visibility wrapper, and shouldn't be allowed wrapped. Config should/will know not to attempt this, but checks within
+the visibility wrapper should assert this also.
+
+
+profiles
+
+if profile is specified, creates repo visibility wrappers to work with it.
+implicit implication is that you can specify a profile per actual repository. not sure about this.
+can also specify it per config, and per domain.
+
+profile is specified per config. all sections can specify an 'inherit' target(s), which is a section to pull values from, and override.
+
+
+
+MAKE.CONF BACKWARDS COMPATIBILITY
+
+assumes /etc/make.profile points at a valid profile , which is used to define the profile for the config.
+make.conf is read, and converted into a config section, all of this is bound under a default domain with root="/".
+PORTDIR is removed and used for ebuild repo's location
+PORTDIR_OVERLAY is removed, and sections are created for each, slaving them to PORTDIR, creating a final repositorySet that binds them
+together.
+/etc/portage/package.* is used as a visibility wrapper on repositorySet.
+
+if FEATURES="binpkg" is defined, then a binpkg repository section is generated, and PKGDIR is removed and used as location for the
+repository.
+
+defaults are lifted from /usr/share/portage/defaults.config ; basically make.global, but in the new config format, and treated as a
+non-modifiable data file, and stored elsewhere
+
+Note that effectively make.conf's existance just serves to mangle defaults.config. it's a mapping of old options into new, with all
+unknown options being used as config fodder (literally, default config section gets 'em).
+
diff --git a/rewrite-misc/default.config b/rewrite-misc/default.config
new file mode 100644
index 0000000..d870dcc
--- /dev/null
+++ b/rewrite-misc/default.config
@@ -0,0 +1,40 @@
+#unused atm, bind it in somehow.
+
+[rsync cache]
+type = cache
+class = portage.cache.flat_list
+path = %(CACHE_PATH)
+
+[rsync repo]
+type = repo
+class = portage.ebuild.repository
+path = /usr/portage
+cache = 'rsync cache'
+
+[vdb]
+type = repo
+class = portage.installed_pkg.repository
+path =
+
+[x86]
+type = config
+USE="nptl nptlonly -gnome -oss mad matroska faad mpg avi -arts mmx mmxext flac bidi dvd ssl sdl xvid 3dnow 3dnowext
+sse sse2 mythtv rtc -svga -ipv6 win32codecs -esd"
+DISTDIR = "/space/distfiles"
+FEATURES = "confcache parallel-fetch"
+package.use = %(CONFDIR)/package.use
+package.mask = %(CONFDIR)/package.mask
+package.unmask = %(CONFDIR)/package.unmask
+package.keywords = %(CONFDIR)/package.keywords
+CFLAGS = "-O2 -march=pentium4 -pipe"
+CHOST = "i686-pc-linux-gnu"
+profile = "rsync repo/profiles/default-linux/x86/2005.0"
+inherit = 'base config'
+ACCEPT_KEYWORSD = "x86"
+
+[default domain]
+type = domain
+root = "/"
+repositories = 'rsync repo' vdb
+config = x86
+
diff --git a/rewrite-misc/example-complex.config b/rewrite-misc/example-complex.config
new file mode 100644
index 0000000..618297f
--- /dev/null
+++ b/rewrite-misc/example-complex.config
@@ -0,0 +1,55 @@
+# note this is no longer a valid config. just exists for information/example of domain flexibility/intentions.
+# ~harring
+
+[rsync cache]
+type = cache
+class = portage.cache.flat_list
+basedir = %(VDB_PATH)
+
+[rsync repo]
+type = repo
+class = portage.ebuild.repository
+basedir = /usr/portage
+cache = 'rsync cache'
+
+[vdb]
+type = repo
+class = portage.installed_pkg.repository
+
+[base config]
+type = config
+USE="nptl nptlonly -gnome -oss mad matroska faad mpg avi -arts mmx mmxext flac bidi dvd ssl sdl xvid 3dnow 3dnowext
+sse sse2 mythtv rtc -svga -ipv6 win32codecs -esd"
+DISTDIR = "/space/distfiles"
+FEATURES = "confcache parallel-fetch"
+package.use = %(CONFDIR)/package.use
+package.mask = %(CONFDIR)/package.mask
+package.unmask = %(CONFDIR)/package.unmask
+package.keywords = %(CONFDIR)/package.keywords
+
+[x86]
+type = config
+CFLAGS = "-O2 -march=pentium4 -pipe"
+CHOST = "i686-pc-linux-gnu"
+profile = "rsync repo/profiles/default-linux/x86/2005.0"
+inherit = 'base config'
+ACCEPT_KEYWORSD = "x86"
+
+[ppc config]
+type = config
+CFLAGS = "-O2 -march=ppc -pipe"
+CHOST = "darwin7.4"
+inherit 'base config'
+ACCEPT_KEYWORDS = "ppc"
+
+[default domain]
+type = domain
+root = "/"
+repositories = 'rsync repo' vdb
+config = x86
+
+[ppc target]
+type = domain
+repositories = 'rsync repo' ppc-vdb
+config = 'ppc config'
+root = "/home/targets/ppc"
diff --git a/rewrite-misc/intro b/rewrite-misc/intro
new file mode 100644
index 0000000..06812a0
--- /dev/null
+++ b/rewrite-misc/intro
@@ -0,0 +1,417 @@
+e'yo. General description of layout/goals/info/etc, and semi sortta api.
+
+That and aggregator of random ass crazy quotes should people get bored.
+
+[DISCLAIMER]
+This ain't the code.
+In other words, the actual design/code may be radically different, and this document probably will trail any major
+overhauls of the design/code (speaking from past experience).
+
+Updates welcome, as are suggestions and questions- please dig through all documentations in the dir this doc is in however, since
+there is a lot of info (both current and historical) related to it. Collapsing info into this doc is attempted, but explanation of the
+full restriction protocol (fex) is a _lot_ of info, and original idea is from previous redesign err... designs.
+Short version, historical, but still relevant info for restriction is in layout.txt.
+Other subsystems/design choices have their basis quite likely from other docs in this directory, so do your homework please :)
+
+
+[FURTHER DISCLAIMER]
+If this is implemented, which is being attempted, this is going to break the hell out of backwards compatibility api wise.
+Writing a translation layer doesn't really seem possible either, since things are pretty radically different (imho)
+
+Sorry, but thems the breaks. Feel free to attempt it, but this docs author doesn't view it as possible/viable
+
+
+[Terminology]
+cp = category/package
+cpv = category/package-version
+ROOT = livefs merge point, fex /home/bharring/embedded/arm-target or more commonly, root=/
+vdb = /var/db/pkg , installed packages database.
+domain = combination of repositories, root, and build information (use flags, cflags, etc). config data + repositories effectively
+repository = tree's. ebuild tree (/usr/portage), binpkg tree, vdb tree, etc.
+protocol = python name for design/api. iter() fex, is a protocol; it calls .next() on the passed in sequence, or wraps the sequence...
+ hesitate to call it defined hook on a class/instance, but this (crappy) description should suffice.
+
+[General design/idea/approach]
+So... this round of "lets take another stab at gutting portage" seems to be proceeding, and not killed off by design flaws *again*
+(famous last words I assure you), but general jist. All pythonic components installed by portage *must* be within portage.* namespace.
+
+No more polluting python's namespace, plain and simple. Third party plugins to portage aren't bound by this however (their mess, not ours).
+
+API flows from the config definitions, *everything* internal is effectively the same. Basically, config crap gives you your starter objects
+which from there, you dig deeper into the innards as needed action wise.
+
+The general design is intended to heavily abuse OOP, something portage has lacked thus far aside from the cache subsystem (cache was
+pretty much only subsystem that used inheritance rather then duplication). Further, delegation of actions down to components _must_
+be abided by, example being repo + cache interaction. repo does what it can, but for searching the cache, let the cache do it.
+Assume what you're delegating to knows what the hell it's doing, and probably can do it's job better then some external caller (essentially).
+
+Actual configuration is pretty heavily redesigned. Look at conf_default_types (likely renamed at some point).
+It's the meta-definition of the config... sort of. Basically the global config class knows jack, is generic, and is configured by the
+conf_default_type. The intention is that the on disk configuration format is encapsulated, so it can be remote or completely different
+from the win.ini format advocated by harring (mainly cause it's easiest since a functional and mostly not completely fricking
+obnoxious parser exists already).
+
+Encapsulation, extensibility/modularity, delegation, and allowing parallelizing of development should be key focuses in
+implementing/refining this high level design doc.
+Realize parallelizing is a funky statement, but it's apt; work on the repo implementations can proceed without being held up by
+cache work, and vice versa. Config is the central hold up, but that's mainly cause it instantiates everything, so the protocol for it
+needs to be nailed down so that you know what positiona/optional args your class/callable will be receiving (that said, positiona/optional
+is configurable via section definitions too).
+
+Final comment re: design goals, defining chunks of callable code and plugging it into the framework is another bit of a goal.
+Think twisted, just not quite as prevalent (their needs/focus is much different from ours, twisted is the app, your code is the lib,
+vice versa for portage).
+
+Back to config. Here's general notion of config 'chunks' (these map out to run time objects)
+
+domain--------------\
+| |
+repository profile
+
+domain is configuration data, accept_(license|keywords), use, cflags, chost, features, etc.
+profile, dependant on the profile class you choose is either bound to a repository, or to user defined location on disk
+(/etc/portage/profile fex). Domain knows to do incremental crap upon profile settings, lifting package.* crap for visibility
+wrappers for repositories also.
+
+repositories is pretty straightforward. portdir, binpkg, vdb, etc.
+
+Back to domain. Domain's are you definition of pretty much what can be done. Can't do jack without a domain, period.
+Can have multiple domains also, and domains do *not* have to be local (remote domains being a different class type).
+Clarifying, think of 500 desktop boxes, and a master box that's responsible for managing them. Define an appropriate domain class,
+and appropriate repository classes, and have a config that holds the 500 domains (representing each box), and you can push updates out
+via standard api trickery. In other words, the magic is hidden away, just define remote classes that match defined class rules
+(preferably inheriting from the base class, since isinstance sanity checks will become the norm), and you could do
+emerge --domain some-remote-domain -u glsa
+on the master box. Emerge won't know it's doing remote crap. Portagelib won't even. It'll just load what you define in the config.
+
+Ambitious? Yeah, a bit. Thing to note, the remote class additions will exist outside of portage proper most likely. Develop the
+code needed in parallel to fleshing portage proper out.
+
+Meanwhile, the remote bit + multiple domains + class overrides in config definition is _explicitly_ for the reasons above. That
+and x-compile/embedded target building, which is a bit funkier.
+
+Currently, portage has DEPEND and RDEPEND. How do you know what needs be native to that box to build the package?
+Literally, how do you know which atoms, say the toolchain, must be native vs what package's headers/libs must exist to build it?
+We need an additional metadata key, BDEPEND (build depends).
+
+If you have BDEPEND, you know what actually is ran locally in building a package, vs what headers/libs are required.
+Subtle differance, but BDEPEND would allow (with a sophisticated depresolver) toolchain to be represented in deps, rather then
+the current unstated dep approach profiles allow.
+
+Aside from that, BDEPEND could be used for x-compile via inter-domain deps; a ppc target domain on a x86 box would require BDEPEND
+from the default domain (x86). So... that's useful.
+
+Not yet proposed to dev ml, and adds to developer workload slightly, although it's a worthwhile sell.
+
+
+[Config design]
+Portage thus far (<=2.0.51*) has had variable ROOT (livefs merge point), but no way to vary configuration data aside from via a buttload of
+env vars. Further, there has been only one repository allowed (overlays are just that, extensions of the 'master' repository).
+Addition of support of any new format is mildly insane due to hardcoding up the wing wang in the code, and extension/modification
+of existing formats (ebuild) has some issues (namely the doebuild block of code).
+
+Goal is to address all of this crap. Format agnosticism at the repository level is via an abstracted repository design that should
+supply generic inspection attributes to match other formats. Specialized searching is possible via match, thus extending the
+extensibility of the prototype repository design.
+
+Format agnosticism for building/merging is somewhat reliant on the repo, namely package abstraction, and abstraction of building/merging
+operations.
+
+On disk configurations for alternatives formats is extensible via changing section types, and plugging them into the domain definition.
+
+Note alt. formats quite likely will never be implemented in portage proper, that's kind of the domain of portage addons. In other words,
+dpkg/rpm/whatever quite likely won't be worked on by portage developers, at least not in the near future (too many other things to do).
+
+The intention is to generalize the framework so it's possible for others to do so if they choose however.
+
+Why is this good? Ebuild format has issues, as does our profile implementation. At some point, alternative formats/non-backwards
+compatible tweaks to the formats (ebuild or profile) will occur, and then people will be quite happy that the framework is generalized
+(seriously, nothing is lost from a proper abstracted design, and flexibility/power is gained).
+
+
+[config's actions/operation]
+portage.config.load_config() is the entrance point, returns to you a config object (portage.config.central).
+This object gives you access to the user defined configs, although only interest/poking at it should be to get a domain object from it.
+
+domain object is instantiated by config object via user defined configuration (/etc/portage/config namely). domains hold instantiated
+repositories, bind profile + user prefs (use/accept_keywords) together, and _should_ simplify this data into somewhat user friendly methods.
+(define this better).
+
+Normal/default domain doesn't know about other domains, nor give a damn. Embedded targets are domains, and _will_ need to know about the
+livefs domain (root=/), so buildplan creation/handling may need to be bound into domains.
+
+
+[Objects/subsystems/stuff]
+So... this is general naming of pretty much top level view of things, stuff emerge would be interested in (and would fool with).
+hesitate to call it a general api, but it probably will be as such, exempting any abstraction layer/api over all of this
+(good luck on that one }:] ).
+
+
+:IndexableSequence:
+functions as a set and dict, with caching and on the fly querying of info.
+mentioned due to use in repository and other places... (it's a useful lil sucker)
+
+
+:global config object (from portage.config.load_config()):
+#simple bugger.
+.get_types() # get the section types.
+.get_object() # return instantiated section
+.list_objects(type) # iterable, given a type, yield section names of that type.
+# and, just to make it damn fun, you can also access types via (fex, domain being a type)
+.domains # IndexableSequence, iterating == section labels, index == instantiate and return that section type
+
+convenience function in portage.config.* (somewhere)
+default_domain(config_obj)
+returns instantiated domain object of the default domain from config_obj.
+Nothing incredibly fancy, finds default domain via config._cparser.defaults().get("domain"), or
+via iterating over config.domain, returning the first domain that is root="/"
+what does the cparser bit map out to in the config?
+[DEFAULT]
+domain = some-section-label
+#the iterating route sucks, and will be a bit slower. default approach is recommended.
+
+
+:domain object:
+# bit of debate on this one I expect.
+# any package.{mask,unmask,keywords} mangling is instantiating as a wrapper around repository instances upon domain instantiation.
+# code _should_ be smart and lift any package.{mask,unmask,keywords} wrappers from repositoriy instances and collapse it, pointing
+# at the raw repo (basically don't have N wrappers, collapse it into a single wrapper). Not worth implementing until the wrapper is
+# a faster implementation then the current portage.repository.visibility hack though (currently O(N) for each pkg instance, N being
+# visibility restrictions/atoms). Once it's O(1), collapsing makes a bit more sense (can be done in parallel however).
+# a word on inter repository dependencies... simply put, if the repository only allows satisfying deps from the same repository,
+# the package instance's *DEPEND atom conversions should include that restriction. Same trickery for keeping ebuilds from depping on
+# rpm/dpkg (and vice versa).
+.repositories # in the air somewhat on this one. either indexablesequence, or a repositorySet.
+ # nice aspect of the latter is you can just use .match with appropriate restrictions. very simply interface
+ # imo, although should provide a way to pull individual repositories/labels of said repos from the set though.
+ # basically, mangle a .raw_repo indexablesequence type trick (hackish, but nail it down when reach that bridge)
+.configure_package() # the contentious one. either a package instance returned from repositories is configured already, or it's returned
+ # unconfigured, and passed through configure_package to get a configured_package.
+ # it's slightly uglier for the resolver to have to do the .configure_package route, but note that the configuration
+ # data _should not_ be bound to the repo (repo is just packages), it's bound to the domain. so... either that config
+ # data is shoved down into some repo wrapper, or handled at this level.
+ # that said, if it's at this level, it makes the .repositories.match() with use restrictions a bit funky in terms of
+ # the "is it configured?" special casing...
+ # debate and flesh this one out...
+
+
+:build plan creation:
+# Jason kindly chuck some details in here, lest harring makes a fool of himself trying to write out info on it himself...
+
+
+:sets:
+# Marius, kindly chuck in some details here. probably defined via user config and/or profile, although what's it define?
+# atoms/restrictions? itermatch might be useful for a true set
+
+
+:build/setup operation:
+(need a good name for this; dpkg/rpm/binpkg/ebuild's 'prepping' for livefs merge should all fall under this, with varying use of the hooks)
+.build() # do everything, calling all steps as needed
+.setup() # whatever tmp dirs required, create 'em.
+.req_files() # (fetchables, although not necessarily with url (restrict="fetch"...)
+.unpack() # guess.
+.configure() # unused till ebuild format version two (ya know, that overhaul we've been kicking around? :)
+.compile() # guess.
+.test() # guess.
+.install() # install to tmp location. may not be used dependant on the format.
+.finalize() # good to go. generate (jit?) contents/metadata attributes, or returns a finalized instance
+
+
+:repo change operation:
+# base class.
+.package # package instance of what the action is centering around.
+.start() # notify repo we're starting (locking mainly, although prerm/preinst hook also)
+.finish() # notify repo we're done.
+.run() # high level, calls whatever funcs needed. individual methods are mainly for ui, this is if you don't display
+ # "doing install now... done... doing remove now... done" stuff.
+
+:remove operation:
+# derivative of repo change operation
+.remove() # guess.
+.package # package instance of what's being yanked.
+
+
+:install operation:
+# derivative of repo change operation
+.package # what's being installed.
+.install() # install it baby.
+
+
+:merge operation:
+# derivative of repo remove and install (so it has .remove and .install, which must be called in .install and .remove order)
+.replacing # package instance of what's being replaced.
+.package # what's being installed
+
+
+:fetchables:
+# basically a dict of stuff jammed together, just via attribute access (think c struct equiv)
+.filename
+.url # tuple/list of url's.
+.chksums # dict of chksum:val
+
+
+:fetcher:
+# hey hey. take a guess.
+# worth noting, if fetchable lacks .chksums["size"], it'll wipe any existing file. if size exists, and existing file is bigger,
+# wipe file, and start anew, otherwise resume.
+# mirror expansion occurs here, also.
+.fetch(fetchable, verifier=None) # if verifier handed in, does verification.
+
+
+:verifier:
+# note this is basically lifted conceptually from mirror_dist. if wondering about the need/use of it, look at that source.
+verify() # handed a fetchable, either False or True
+
+
+:repository:
+# this should be format agnostic, and hide any remote bits of it.
+# this is general info for using it, not designing a repository class
+.mergable() # true/false. pass a pkg to it, and it reports whether it can merge that or not.
+.livefs # boolean, indicative of whether or not it's a livefs target- this is useful for resolver, shop it to other repos, binpkg fex
+ # prior to shopping it to the vdb for merging to the fs. Or merge to livefs, then binpkg it while continuing further building
+ # dependant on that package (ui app's choice really).
+.raw_repo # either it weakref's self, or non-weakref refs another repo. why is this useful? visibility wrappers...
+ # this gives ya a way to see if p.mask is blocking usable packages fex. useful for the UI, not too much for
+ # portagelib innards.
+.frozen # boolean. basically, does it account for things changing without it's knowledge, or does it not. frozen=True is faster
+ # for ebuild trees for example, single check for cache staleness. frozen=False is slower, and is what portage does now
+ # (meaning every lookup of a package, and instantiation of a package instance requires mtime checks for staleness)
+.categories # IndexableSequence, if iterated over, gives ya all categories, if getitem lookup, sub-category category lookups.
+ # think media/video/mplayer
+.packages # IndexableSequence, if iterated over, all package names. if getitem (with category as key), packages of that category.
+.versions # IndexableSequence, if iterated over, all cpvs. if getitem (with cat/pkg as key), versions for that cp
+.itermatch() # iterable, given an atom/restriction, yields matching package instances.
+.match() # def match(self, atom): return list(self.itermatch(atom)) # voila.
+.__iter__() # in other words, repository is iterable. yields package instances.
+.sync() # sync, if the repo swings that way.
+ # flesh it out a bit, possibly handing in/back ui object for getting updates...
+
+digressing for a moment...
+
+note you can group repositories together, think portdir + portdir_overlay1 + portdir_overlay2.
+Creation of a repositoryset basically would involve passing multiple instantiating repo's, and depending on that classes semantics,
+it internally handles the stacking (right most positional arg repo overrides 2nd right most, ... overriding left most)
+So... stating it again/clearly if it ain't obvious, everything is configuration/instantiating of objects, chucked around/mangled by the
+portagelib framework.
+
+What _isn't_ obvious is that since a repository set gets handed instantiated repositories, each repo, _including_ the set instance, can
+should eb able to have it's own cache (this is assuming it's ebuild repos through and through). Why?
+Cache data doesn't change for the most part exempting which repo a cpv is from, and the eclass stacking. Handled individually, a cache
+bound to portdir _should_ be valid for portdir alone, it shouldn't carry data that is a result of eclass stacking from another overlay +
+that portdir. That's the business of the repositoryset.
+Consequence of this is that the repositoryset needs to basically reach down into the repository it's wrapping, get the pkg data, _then_
+rerequest the keys from that ebuild with a different eclass stack.
+This would be a bit expensive, although once inherit is converted to a pythonic implementation (basically handing the path to the requested
+eclass down the pipes to ebuild*.sh), it should be possible to trigger a fork in the inherit, and note python side that multiple sets of
+metadata are going to be coming down the pipe. That should alleviate the cost a bit, but it also makes multiple levels of cache reflecting
+each repository instance a bit nastier to pull off till it's implemented.
+
+So... short version. Harring is a perfectionist, and says it should be this way. reality of the situation makes it a bit trickier.
+Anyone interested in attempting the mod, feel free, otherwise harring will take a crack at it since he's being anal about having it work
+in such a fashion.
+
+Or... could do thus. repo + cache as a layer, wrapped with a 'regen' layer that handles cache regeneration as required.
+Via that, would give the repositoryset a way to override and use it's own specialized class that ensures each repo gets what's proper
+for it's layer. Think raw_repo type trick.
+
+continuing on...
+
+
+:cache:
+# ebuild centric, although who knows (binpkg cache ain't insane ya know).
+# short version, it's functionally a dict, with sequence properties (iterating over all keys).
+.keys() # return every cpv/package in the db.
+.readonly # boolean. is it modifiable?
+.match() # flesh this out. either handed a metadata restriction (or set of 'em), or handed dict with equiv info (like the former).
+ # ebuild caches most likely *should* return mtime information alongside, although maybe dependant on readonly.
+ # purpose of this? Gives you a way to hand off metadata searching to the cache db, rather then the repo having to resort
+ # to pulling each cpv from the cache and doing the check itself.
+ # This is what will make rdbms cache backends finally stop sucking and seriously rocking, properly implemented at least. :)
+ # clarification, you don't call this directly, repo.match delegates off to this for metadata only restrictions
+
+
+:restriction:
+# see layout.txt for more fleshed out examples of the idea.
+# note, match and pmatch have been reversed namewise.
+.match() # handed package instance, will return bool of whether or not this restriction matches.
+.pmatch() # 'optimized' interface. basically, you hand it the data it tests, rather then a package instance.
+ # basically, for a category restriction (fex), def match(self, p): return self.pmatch(p.category)
+ # get the jist? it holds the actual test, if the restriction can be collapsed down to a test of a single value.
+ # .pmatch calling should only be done by self, or code that does isinstance checks and hands off data itself rather then
+ # doing an instantion. .pmatch is exposing a bit of the match protocol so that optimizations are potentially possible in
+ # executing match checks (think repo.match).
+.itermatch() # new one, debatable. short version, giving a sequence of package instances, yields true/false for them.
+ # why might this be desirable? if setup of matching is expensive, this gives you a way to amoritize the cost.
+ # jason/marius/alec, thoughts?
+ # might have use for glsa set target. define a restriction that limits to installed pkgs, yay/nay if update is avail...
+
+:restrictionSet:
+# mentioning it merely cause it's a grouping (boolean and/or) of individual restrictions
+# an atom, which is in reality a category restriction, package restriction, and/or version restriction is a
+# boolean and set of restrictions
+
+
+:ContentsRestriction:
+# whats this you say? a restriction for searching the vdb's contents db? Perish the thought! ;)
+
+
+:metadataRestriction:
+Mentioning this for the sake of pointing out a subclass of it, DescriptionRestriction- this will be a
+class representing matching against description data. See repo.match and cache.match above.
+The short version is that it encapsulates the description search (a *very* slow search right now) so that
+repo.match can hand off to the cache (delegation), and the cache can do the search itself, however it sees fit.
+
+So... for the default cache, flat_list (19500 ebuilds == 19500 files to read for a full searchDesc), still is slow unless flat_list
+gets some desc. cache added to it internally. If it's a sql based cache, the sql_template should translate the query into
+the appropriate select statement, which should make it *much* faster.
+
+Restating that, delegation is *absolutely* required. There have been requests to add intermediate caches to the tree, or move data
+(whether collapsing metadata.xml or moving data out of ebuilds) so that the form it is stored is in quicker to search.
+These approaches are wrong. Should be clear from above that a repository can, and likely will be remote on some boxes. Such a shift
+of metadata does nothing but make repository implementations that harder, and shift power away from what knows best how to use it.
+Delegation is a massively more powerful approach, allowing for more extensibility, flexibility and *speed*.
+
+Final restating- searchDesc is matching against cache data. The cache (whether flat_list, anydbm, sqlite, or a remote sql based cache)
+is the *authority* about the fastest way to do searches of it's data.
+Programmers get pist off when users try and tell them how something internally should be implemented- it's fundamentally the same scenario.
+The cache class the user chooses knows how to do it's job the best, provide methods of handing control down to it, and let it do it's
+job (delegation). Otherwise you've got a backseat driver situation, which doesn't let those in the know, do the deciding (cache knows,
+repo doesn't).
+
+Mind you not trying to be harsh here. If in reading through the full doc you disagree, question it; if after speeding up current
+cache implementation, note that any such change must be backwards compatible, and not screw up the possibilities of encapsulation/delegation
+this design aims for.
+
+
+:logging:
+flesh this out (define this basically).
+short version, no more writemsg type trickery, use a proper logging framework.
+
+
+[ebuild-daemon.sh]
+Hardcoded paths *have* to go. /usr/lib/portage/bin == kill it.
+Upon initial loadup of ebuild.sh, dump the default/base path down to the daemon, *including* a setting for /usr/lib/portage/bin .
+Likely declare -xr it, then load the actual ebuild*.sh libs.
+Backwards compatibility for that is thus, ebuild.sh defines the var itself in global scope if it's undefined. Semblence of backwards
+compatibility (which is actually somewhat pointless since I'm about to blow it out of the water).
+
+Ebuild-daemon.sh needs a function for dumping a _large_ amount of data into bash, more then just a line or two.
+
+For the ultra paranoid, we load up eclasses, ebuilds, profile.bashrc's into python side, pipe that to gpg for verification, then
+pipe that data straight into bash. No race condition possible for files used/transferred in this manner.
+
+A thought. The screw around speed up hack preload_eclasses added in ebd's heyday of making it as fast as possible would be one route;
+Basically, after verification of an elib/eclass, preload the eclass into a func in the bash env. and declare -r the func after the fork.
+This protects the func from being screwed with, and gives a way to (at least per ebd instance) cache the verified bash code in memory.
+
+It could work surprisingly enough (the preload_eclass command already works), and probably be fairly fast versus the alternative.
+So... the race condition probably can be flat out killed off without massive issues. Still leaves a race for perms on any files/*, but neh.
+A) That stuff shouldn't be executed, B) security is good, but we can't cover every possibility (we can try, but dimishing returns)
+
+A lesser, but still tough version of this is to use the indirection for actual sourcing to get paths instead.
+No EBUILD_PATH, query python side for the path, which returns either '' (which ebd interprets as "err, something is whacked, time to
+scream"), or the actual path.
+
+In terms of timing, gpg verification of ebuilds probably should occur prior to even spawning ebd.sh. profile, eclass, and elib
+sourcing should use this technique to do on the fly verification though.
+Object interaction for that one is going to be *really* fun, as will be mapping config settings to instantiation of objs.
diff --git a/rewrite-misc/layout.txt b/rewrite-misc/layout.txt
new file mode 100644
index 0000000..66a1683
--- /dev/null
+++ b/rewrite-misc/layout.txt
@@ -0,0 +1,273 @@
+===NOTE===
+This was written _way_ the hell back in the past, it may not map to the current iteration of design/goals.
+Some goofy/interesting/good ideas in it, good for historical info.
+
+Rewrite this once the sucker is actually fleshed out.
+"In our next episode of hellsing, we finally bring peace and quiet to our fair london town,
+our final episode...
+Hellfire!
+do do doo da...
+Sweet, baby..."
+===Harring was bored, continue about your business===
+
+specification of subsystems/relevant classes.
+this isn't a concrete "it must be this way", this is a guideline of the various subsystems/import classes,
+and how they interact.
+
+The intention being that you can read this, see the important aspects of portage at a single glass,
+and be aware of where/which subsystems touch up, and how things fit together
+
+sync subsystem/class:
+ #sync'ing method.
+ #potentially limited to a specific tree type. detection/complaints regarding this should be
+ #detected/bailed upon during tree initialization (?)
+ boolean sync()
+
+cachedb subsystem/class:
+ #metadata cache. non structure caching, just metadata caching (for instances where it's
+ #ass slow getting the keys, eg bash based formats)
+ #format agnostic, state the keys stored up front at initialization, and that's what stored.
+ #this conflicts with portage_db_flat. course that format sucks, so neh.
+ #note, not necessarily required, and is a property of the defined repository/overlay
+ #additionally, note the cache is a db. it doesn't update itself, it just retrieves/stores
+ #data.
+ functions as a dict.
+ __getitem__
+ __setitem__
+ __delitem__
+ get_matches(SearchRestrictions):
+ #assuming the cache can be stated upto date, or never drifts from the tree
+ #or, just using the cache to handle the bulk of the cpv's.
+
+class src_format:
+ #abstract class representing a package to be acted upon.
+ #this is not a cpv, which is just (really) some random string a tree/cache spits
+ #this instance is basically a handler.
+ #note these are high level definitions. individual formats are free to define
+ #what ever phases they want w/in the high level format.
+ get_src_uri
+ #stuff to fetch for this to be used
+ build
+ #high level represenation of building a src based package.
+ #note binpkgs just return
+ merge
+ #actual transferrance/installation to either the livefs to an externally
+ #specified location
+ unmerge
+ #high level representation of any potential commands to execute prior to
+ #removing this format's files. prerm/postrm fex.
+ get_metadata (currently ebuild.py:ebuild_handler.get_keys)
+ #get metadata. *all* available metadata. if the format allows for undefined values,
+ #return None if the metadata is undefined.
+ #
+ #formerly ebuild.py:ebuild_handler.get_keys
+
+class Package:
+ # this is an atomic package returned from a repository. JIT binding of metadata attributes, category/package/name/version
+ # are attributes additionally presuming the underlying format supports it
+
+ unique_id
+ # each derivative *must* define a unique_id that represents that versioned pkg; this is used for caching of instances.
+
+ pkg_id
+ # each derivative *must* define a pkg_id representing the category/package, non versioned (if the repository supports
+ # multiple package/versions/slots- dpkg, fex would define package-version as the key, due to their namespace
+ # layout)
+ # key must be an immutable instance, due to potential for hashing.
+
+ version_cmp(other-instance)
+ # each derivative *must* define this; it's for comparing instances when the key is the same (cpv is the same).
+ # used by the depresolver to determine the 'highest' version package instance.
+ # for dpkg (again, fex), version_cmp would be basically a key compare.
+ # for ebuilds, it would compare slot, then version
+ # returns either -1, 0, or 1 to indicate if the instance (instance.version_cmp) is lt/eq/gt the passed in version
+
+ get_handler
+ # this should be bound to the src repository, via that the src_format- the instance should be able to return
+ # an instance of the src_format handler class, for doing building/installing/etc
+
+ data
+ #attribute, functions as a dict. don't care how it's implemented, whether via descriptors (going that route for JIT),
+ #or a customized dict derivative.
+ #fex, for ebuildPackageInstance, holds (currently)
+ #category, package, version, SLOT, DESCRIPTION, DEPENDS, RDEPENDS, CDEPENDS, PDEPENDS, SRC_URI, KEYWORDS, RESTRICT,
+ #HOMEPAGE, etc. auxdbkeys.
+ #attributes *should* be access via lowercase.
+ #
+ #This is debatable.
+ #mostly I just hate the uppercase for auxdbkeys.
+
+ _get_metadata
+ # access the handed in repository, and get metadata
+
+# immediate derivatives
+class ebuildPackageInstance(Package):
+ this is a common derivative of PackageInstance that holds the key attribute setting, and version_cmp logic.
+ get_handler being defined here, or required handled in derivates will be defined at the time of implementation
+ the crystal ball is sometimes a bit foggy, additionally genbot's 8ball functionally seems not too random :/
+
+
+class indexable sequence: #stating it here
+ either can iterate over it ala list, or access it via a hashable object
+ example, using catpkg as key, and versions values
+ for x in instance: == cpv
+ instance[catpkg] == versions of that cp
+
+class protoTree
+ #base tree class. repository configuration file/specification is used in constructing this instance,
+ #namely specification of cacheing backend (if any), and sync'ing method (if any)
+ #cache isn't directly used by this class, although the update_cache method is a maintenance method
+ #for syncing the cache up with the tree (just as the sync function is used to sync the local tree
+ #to a remote tree)
+
+ #note, not all repositories may have categories, or versions persay.
+ #flat namespaces of packages fex, won't. Probably need a special case then
+ #
+ # yes, special cases suck, but what other approach is there?
+ # we *have* to have a way for external code to inspect/walk the repository... custom attributes
+ # for walking the repository will only make things messier (inspect the instance to figure out how to inspect the instance?
+ # no thank you :)
+ categories = indexable sequence. for x in categories == walk all categories. categories[cat] == get subcategory
+ packages = indexable sequence. for x in packages == all cp. packages[category] == all packages in category
+ versions = indexable sequence. for x in versions == all cpv. versions[cp] == cat/packages versions
+
+
+ search(RestrictionSet)
+ = handed a RestrictionSet, returns PackageInstances that match the passed in Restrictions
+
+ sync = sync this tree. the actual sync method is defined by the repository configuration,
+ and is a sync class instance.
+
+ update_cache = cache update method, for walking the tree and cleansing/updating the cache.
+ if the repository config states update the cache on sync, this is called.
+ seperated due to the fact that the cache backend is variable, may not even be used
+ (eg no cache).
+
+ #additionally, the instance itself functions as a indexable sequence for getting *all* packageInstances in this repository.
+ #so... you can either do for x in instance == get *package-cpv* of all cpv, or instance[cpv] to get a specific package-cpv
+
+class gentoo_ebuild_repository
+ # this is a derivative of protoTree that has the usual repository characteristics- categories, versions, packages, etc.
+ __get_categories(self, *optional_base_category)
+ = derivatives overload this to return categories, or if an optional
+ positional arg is specified, the sub-categories- non recursive.
+ returns a tuple of str
+ __get_versions(str package)
+ = derivatives overload this to return versions available for a package
+ returns a tuple of str
+ __get_packages(str category)
+ = derivatives overload this to return packages avaiable for a category
+ returns a tuple of str
+
+ # note our current bintree, and ebuild tree should derive from this.
+ # this is just an intermediate derivative holding common code.
+ # remote binhost fex, would maintain a similar attribute layout, but wouldn't derive from this.
+ # why? cause it's method of access is *far* different from the common approach, despite maintaining similar
+ # notions of category/package/versions
+
+search subsystem/classes:
+
+ # These are just the building blocks of other restrictions.
+
+ class StrRestriction(object):
+ # these aren't really Restrictions persay; they're the building blocks of actual Restrictions.
+ match(str or instance that can be coerced to a str)
+
+ class StrExactRestriction(StrRestriction):
+ __init__(value, CaseSensitive=True)
+ # class for seeing if two strings *are the exact same*, after accounting for case sensitivity.
+ match(str or instance that can be coerced to a str)
+
+ class StrSubStringRestriction(StrRestriction):
+ __init__(value, CaseSensitive=True)
+ # class representing matching a substring in the handed in value.
+ match(str or instance that can be coerced to a str)
+
+ class StrRegexRestriction(StrRestriction):
+ __init__(regex, CaseSensitive=True):
+ # take a guess.
+ match(str or instance that can be coerced to a str)
+
+
+ class BaseRestriction(object):
+ # all derivatives *should* be __slots__ based to decrease mem overhead (potentially creating a lot of these
+ # suckers) unless otherwise warranted.
+ # it's minor, but most restrictions are write once- using a new style class w/ __slots__ makes the write once/
+ # error second write behaviour easily doable, and generalized in this base restriction.
+ pmatch(PackageInstance)
+ # general, high level non optimized match.
+ # requires a package instance, derivatives will look in PackageInstance.data to do their matching.
+
+
+ class AtomicRestriction(BaseRestriction):
+ # has the usual pmatch method, plus a direct access method
+ match(value)
+ # direct access matching. This should hold the actual implementation of the matching check.
+ # pmatch should just contain the code for getting at the actual value.
+ # this method exists to allow for code that deals w/ restrictions, to directly hand values to the check
+ #
+ # **NOTE** RestrictionSets lack this method. This method *should* be defined only for atomic Restrictions,
+ # eg, individual checks.
+
+
+ # the first real, package specific restriction.
+ class PackageDataRestriction(AtomicRestriction):
+ __init__(data_name, StrRestriction instance)
+ # data_name can be anything technically.
+ # typically either category, package, version, or one of the auxdbkeys.
+ # an initialized StrRestriction derivative is passed in, and used for matching.
+ match(value):
+ # again, the direct access method. all matching code should be here.
+ pmatch(PackageInstance):
+ # the non-specific route. handed a PackageInstance.
+
+ class VersionRestriction(PackageDataRestriction):
+ # implements version specific restriction rules
+
+ class RestrictionSet(BaseRestriction):
+ # abstraction over a set of individual restrictions.
+ # builds a list of derivatives of Restriction class- note I said *derivatives*
+ # RestrictionSet is a derivative of Restriction.
+ # In other words, you can nest these buggers.
+
+ __init__(*initialRestrictions)
+ #initialRestrictions is optional. basically, all args are grouped into a list, and used as the initial
+ #list of Restrictions (non-finalized)
+ #basically is the equiv of getting the instance, then calling .addRestriction repeatedly
+ #must be a list. a RestrictionSet is a Restriction derivative, so it would be added as is.
+
+ addRestriction(self, Restriction-Instance):
+ #append the passed in restriction to the current chain of restrictions.
+
+ finalize(self):
+ # optimize this chain (if viable/possible).
+ # mostly just convert it from a list to a tuple, and do it recursively
+ # across all held RestrictionSets in this instance.
+
+ pmatch(self, PackageInstance):
+ # run through the checks, returning true/false indicating if this set of restrictions matches, or doesn't.
+
+ class AndRestrictionSet(RestrictionSet):
+ # run through the list of restrictions, and'ing their returns together.
+
+ class OrRestrictionSet(RestrictionSet):
+ # run through the list of restrictions, or'ing their returns toegether.
+
+ class PackageRestriction(AndRestrictionSet):
+ # this is basically an atom.
+ # for >=dev-util/diffball-0.6_pre4, holds
+ # (PackageDataRestriction('category',StrExactRestriction('dev-util')),
+ # PackageDataRestriction('name',StrExactRestriction('diffball')),
+ # PackageDataRestriction('version',StrExactRestriction('dev-util')))
+ #
+ # basically just a simple, mostly non modifying AndRestrictionSet. It is a special class
+ # such that it can be easily identified by code via isintance checks, and will serve as the base
+ # for potentially deriving a custom (per format) variation, that has an optimized version of .finalize
+
+ class ConfigRestriction:
+ # restrictions that apply to the systems configuration. eg, a package being built with a certain use flag enabled.
+
+
+class depresolver:
+ #pluggable.
+