From df657c6b468ff94b1b590aba133a3ccaa0395396 Mon Sep 17 00:00:00 2001 From: "Elf M. Sternberg" Date: Fri, 17 Jun 2016 07:53:27 -0700 Subject: [PATCH] Updated readme with a clear explanation of the problem statement. --- README.rst | 42 +++++++++++++++++++++++++++++++---- docs/notes.txt | 49 +++++++++++++++++++++++++++++++++++++++++ polyloader/_python27.py | 27 +++++++++++++++++++++-- 3 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 docs/notes.txt diff --git a/README.rst b/README.rst index defddb6..055cc31 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,41 @@ -**polyloader** is a python module to hook into Python's import machinery -and insert your own syntax parser/recognizer. Importlib uses filename -suffixes to recognize which compiler to use, but is internally -hard-coded to only recognize ".py" as a valid suffix. +Synopsis +-------- + +**Polyloader** is a python module that enables the discovery and loading +of heterogenous source code packages. This discovery and loading is +critical to the functioning of other programming languages that use the +Python AST and Python VM, languages such as Hy, Doge, and Mochi. + +Problem Statement + ----------------- + +The Python module loader system is hard-coded to prevent the discovery +of heterogenous source code packages. From Python 2.6 through the +current (as of this writing) Python 3.5, the import mechanism allowed +for the creation of file finders and importers that would transform +Python's import syntax into a *path,* assert whether or not that path +could be made to correspond to a *syntax object*, and then attempt to +*load* that syntax object as a Python module. Python *packages*, +however, are assumed to be uniformly made up of Python syntax objects, +be they **.py** source files, **.pyc/.pyo** bytecode, or **.so/.dll** +files with an exposed Python-to-C API. In Python 2 these suffixes are +hard-coded into the source in the **imp** builtin module; in Python 3 +these suffixes are constants defined in a private section of +**importlib**; in either case, they are unavailable for modification. +This lack of access to the extensions list prevents the *discovery* of +heterogenous source code packages. + +The discovery mechanism is outlined in Python's pkgutil module; features +such as **pkgutil.iter_modules** do not work with heterogenous source +code, which in turn means that one cannot write, for one important +example, Django management commands in an alternative syntax. + +**polyloader** is a python module that intercepts calls to the default +finder, loader, and package module iterator, and if the path resolves to +an alternative syntax, provide the appropriate finder, loader and +iterator. **polyloader** is different from traditional importlib shims +in that it directly affects the root loader, and thus allows for the +discovery and importation of suffixes not listed in Python's defaults. To use: ------- diff --git a/docs/notes.txt b/docs/notes.txt new file mode 100644 index 0000000..47f65b9 --- /dev/null +++ b/docs/notes.txt @@ -0,0 +1,49 @@ +Python 2.7 + +iter_modules (iter_importers) -> + calls iter_importer_modules for each importer in iter_importers + +iter_importers (meta_path, get_importer) -> + returns every importer in sys.meta_path + map(get_importer, sys.path) + +get_importer(path): + + returns a filtered list of sys.path_hooks for importers that can + handle this path; if there is no match, returns ImpImporter(), + which supplies a module iterator (ImpImporter.iter_modules) that + relies on getmodulename. + + * A path_hook is a function of (path -> Maybe importer) + +iter_modules(path, get_importer, prefix) -> + calls iter_importer_modules for each importer returned by path.map(get_importer) + +iter_importer_modules(importer) -> + returns list of (filename, ispkg) for each module understood by the importer + * The method called depends on the class of the importer + * The default is a generic call for "no specific importer" + * For FILES, iter_import_modules returns a list of files whose + extensions match those in imp.get_suffixes(), which is hard- + coded into the interpreter. + * MEANING: Unless your importer can handle heterogenous module + suffixes, SourceFiles.iter_importer_modules can only find + homogeonous modules. + +This relationship issue holds for Python 2.6 as well. + +Python 3.3 + + The same issue holds, although now most of the extensions have been + moved to importlib._bootstrap. + +It is the relationship between + importlib.machinery.FileFinder +and + _iter_file_finder_modules + +That's killing us. + + + + + \ No newline at end of file diff --git a/polyloader/_python27.py b/polyloader/_python27.py index 3a2668d..c866ce5 100644 --- a/polyloader/_python27.py +++ b/polyloader/_python27.py @@ -46,8 +46,31 @@ class PolyLoader(pkgutil.ImpLoader): sys.modules[fullname] = module return module - -class PolyFinder(pkgutil.ImpImporter): + +# Problem to be solved: pkgutil.iter_modules depends upon +# get_importer, which requires that we uses path_hooks, not meta_path. +# This is acceptable (see: https://pymotw.com/2/sys/imports.html), but +# then it depends upon the inspect get_modulename, which in turn is +# dependent upon the __builtin__.imp.get_suffixes(), which excludes +# anything other than the builtin-recognizes suffixes. The +# arrangement, as of Python 2.7, excludes heterogenous packages from +# being locatable by pkgutil.iter_modules. +# +# iter_modules use of the simplegeneric protocol makes things even +# harder, as the order in which finders are loaded is not available at +# runtime. +# +# Possible solutions: We provide our own pkgutils, which in turn hacks +# the iter_modules; or we provide our own finder and ensure it gets +# found before the native one. + +We actually want to have multiple +# SourceFileFinders, each of which either recognizes the file to be + +class PolyFinder(object): + def __init__(self, path = None): + self.path = path + def find_on_path(self, fullname): fls = ["%s/__init__.%s", "%s.%s"] dirpath = "/".join(fullname.split("."))