From d6ea4d2dcf4600c33ce68ea84e3dd0396b852b75 Mon Sep 17 00:00:00 2001 From: "Elf M. Sternberg" Date: Thu, 30 Jun 2016 11:05:28 -0700 Subject: [PATCH] Working editions, plus some blog entries. --- Makefile | 1 + docs/blog1.md | 93 +++++++++++++++ docs/blog2.md | 154 +++++++++++++++++++++++++ docs/notes.html | 51 +++++++++ docs/notes.md | 236 +++++++++++++++++++++++++++++++++++++++ docs/notes.txt | 105 ++++++++++++++++- polyloader/_python2.py | 189 ++++++++++++++++++++----------- tests/test_polyloader.py | 5 +- 8 files changed, 767 insertions(+), 67 deletions(-) create mode 100644 docs/blog1.md create mode 100644 docs/blog2.md create mode 100644 docs/notes.html create mode 100644 docs/notes.md diff --git a/Makefile b/Makefile index 4eed6ea..c69fbfc 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ help: clean: clean-build clean-pyc clean-test + clean-build: rm -fr build/ rm -fr dist/ diff --git a/docs/blog1.md b/docs/blog1.md new file mode 100644 index 0000000..c34dbf9 --- /dev/null +++ b/docs/blog1.md @@ -0,0 +1,93 @@ +A minor bug in the Hy programming language has led me down a rabbit hole +of Python's internals, and I seem to have absorbed an awful lot of +Python's +[`import`](https://docs.python.org/2.7/reference/simple_stmts.html#import) +semantics. The main problem can best be described this way: In Python, +you call the import function with a string; that string gets translated +in some way into python code. So: what are the *exact* semantics of the +python `import` command? + +Over the next couple of posts, I'll try to accurately describe what it +means when you write: + +``` +import alpha.beta.gamma +from alpha import beta +from alpha.beta import gamma +from .delta import epsilon +``` + +In each case, python is attempting to resolve the collection of dotted +names into a *module object*. + +**module object**: A resource that is or can be compiled into a +meaningful Python *module*. This resource could a file on a filesystem, +a cell in a database, a remote web object, a stream of bytes in an +object store, some content object in a compressed archive, or anything +that can meaningfully be described as an array of bytes (Python 2) or +characters (Python 3). It could even be dynamically generated! + +**module**: The organizational unit of Python code. A namespace +containing Python objects, including classes, functions, submodules, and +immediately invoked code. Modules themselves may be collected into +*packages*. + +**package**: A python module which contains submodules or even +subpackages. The most common packaging scheme is a directory folder; in +this case the folder is a module if it contains an `__init__.py` file, +and it is a *package* if it contains other modules. The name of the +package is the folder name; the name of a submodule would be +`foldername.submodule`. This is called *regular packaging*. An +alternative method, which we will address later, is known as *namespace +packaging*. + +Python has a baroque but generally flexible mechanism for defining how +the dotted name is turned into a *module object*, which it calls *module +finding*, and for how that *module object* is turned into a *code +object* within the current Python session, called *module loading*. + +Python also has a means for *listing* modules. *Listing* is usually +done on a list of paths, using an appropriate means for accessing the +contents at the end of a path. + +The technical definition of a *package* is a module with a `__path__`, a +list of paths that contain submodules for the package. Subpackages get +their own` __path__`. A package can therefore accomodate `.` and `..` +prefixes in submodules, indicating relative paths to sibling modules. A +package can also and to its own `__path__` collection to enable access +to submodules elsewhere. + +-- + +The problem I am trying to solve: + +Python *module listing* depends upon *finder* resolving a *path* to to a +container of modules, usually (but not necessarily) a *package*. The +very last finder is the default one: after all alternatives provided by +users have been exhausted, Python reverts to the default behavior of +analyzing the filesystem, as one would expect. The default finder is +hard-coded to use the Python builtin `imp.get_suffixes()` function, +which in turn hard-codes the extensions recognized by the importer. + +If one wants to supply alternative syntaxes for Python and have +heterogenous packages (for examples, packages that contain some modules +ending in `.hy`, and others `.py`, side-by-side)... well, that's just +not possible. + +Yet. + +In the next post, I'll discuss Python's two different *finder* +resolution mechanisms, the *meta_path* and the *path_hook*, and how they +differ, and which one we'll need to instantiate to solve the problem of +heterogenous Python syntaxes. The actual solution will eventually +involve *eclipsing* Python's default source file handler with one that +enables us to define new source file extensions at run-time, recognize +the source file extensions, and supply the appropriate compilers for +them. + +My hope is that, once solved, this will further enable the development +of Python alternative syntaxes. Folks bemoan the +[explosion of Javascript precompilers](https://github.com/jashkenas/coffeescript/wiki/list-of-languages-that-compile-to-js), +but the truth is that it has in part led to a revival in industrial +programming languages and a renaissance in programming language +development in general. diff --git a/docs/blog2.md b/docs/blog2.md new file mode 100644 index 0000000..5c4609a --- /dev/null +++ b/docs/blog2.md @@ -0,0 +1,154 @@ +In the last post, I introduced the concepts of the **module object**, +**module**, and **package**, concrete objects that exist within the +Python runtime, as well as some basic ideas about packaging, finding, +and loading. + +In this post, I'll go over the process of *finding*, what it means to +*find* something, and what happens next. + +## A Clarifying point + +I've been very careful to talk about *finding* vs. *loading* +vs. *listing* in this series of posts. There's a reason for that: in +Python 2, the terms "Finder" and "Importer" were used interchangeably, +leading to (at least on my part) massive confusion. In actual fact, +finders, hooks, loaders, and listers are all individual objects, each +with a single, unique method with a specific signature. The method name +is different for each stage, so it is theoretically possible to define a +single class that does all three for a given category of *module +object*, and only in that case, I believe, should we talk about an +"Importer." + +In Python 2.6 and 2.7, the definitive Finder class is called +`pkgutil.ImpImporter`, and the Loader is called `pkgutil.ImpLoader`; +this was a source of much of my confusion. In Python 3, the term +"Importer" is deprecated and "Finder" is used throughout `importlib`. I +will be using "Finder" from now on. + +## Finding + +When the 'import ' command is called, a procedure is +triggered. That procedure then: + +* attempts to *find* a corresponding python *module* +* attempts to *load* that corresponding module into *bytecode* +* Associates the bytecode with the name via sys.modules[fullname] +* Exposes the bytecode to the calling scope. +* Optionally: writes the bytecode to the filesystem for future use + +*Finding* is the act of identifying a resource that corresponds to the +import string that can be compiled into a meaningful Python module. The +import string is typically called the *fullname*. + +*Finding* typically involves scanning a collection of *resources* +against a collection of *finders*. *Finding* ends when *finder `A`*, +given *fullname `B`*, reports that a corresponding module can be found +in *resource `C`*, and that the resource can be loaded with *loader +`D`*." + +### MetaFinders + +*Finders* come first, and *MetaFinders* come before all other kinds of +finders. + +_Most finding is done in the context of `sys.path`_; that is, Python's +primary means of organizing Python modules is to have them somewhere on +the local filesystem. This makes sense. Sometimes, however, you want +to get in front of that scan and impose your own logic: you want the +root of an import string to mean something else. Maybe instead of +`directory.file`, you want it to mean `table.row.cell`, or you want it +to mean `website.path,object`, to take +[one terrifying example](http://blog.dowski.com/2008/07/31/customizing-the-python-import-system/). + +That's what you do with a MetaFinder: A MetaFinder may choose to ignore +the entire sys.path mechanism and do something that has nothing to do +with the filesystem, or it may have its own take on what to do with +`sys.path`. + +A Finder is any object with the following method: +``` +[Loader|None] find_module([self|cls], fullname:string, path:[string|None]) +``` + +The find_module method returns None if it cannot find a loader resource +for fullname & path. + +A MetaFinder is placed into the list `sys.meta_path` by whatever code +needs the MetaFinder, and it persists for the duration of the runtime, +unless it is later removed or replaced. Being a list, the search is +ordered; first match wins. MetaFinders may be instantiated in any way +the developer desires before being added into `sys.meta_path`. + +### PathHooks and PathFinders + +*PathHooks* are how `sys.path` is scanned to determine the which Finder +should be associated with a given directory path. + +A PathHook is a function (or callable): +``` +[Finder|None] (path:string) +``` + +A *PathHook* takes a given directory path and, if the PathHook can +identify a corresponding FileFinder for the modules in that directory +path and return a constructed instance of that FileFinder, otherwise it +returns None. + +If no `sys.meta_path` finder returns a loader, the full array of +`sys.paths ⨯ sys.path_hooks` is compared until a PathHook says it can +handle the path _and_ the corresponding finder says it can handle the +fullname. If no match happens, Python's default FileFinder class is +instantiated with the path. + +This means that for each path in `sys.paths`, the list of +`sys.path_hooks` is scanned; the first function to return an importer is +handed responsibility for that path; if no function returns, the default +FileFinder is returned; the default FileFinder returns only the default +SourceFileLoader which (if you read to the end of +[part one](http://elfsternberg.com)) blocks our path toward +heterogeneous packages. + +PathHooks are placed into the list `sys.path_hooks`; like +`sys.meta_path`, the list is ordered and first one wins. + +### The Takeaway + +There's some confusion over the difference between the two objects, so +let's clarify one last time. + + Use a **meta_finder** (A Finder in +`sys.meta_path`) when you want to redefine the meaning of the import +string so it can search alternative paths that may have no reference to +a filesystem path found in `sys.path`; an import string could be +redefined as a location in an archive, an RDF triple of +document/tag/content, or table/row_id/cell, or be interpreted as a URL +to a remote resource. + + Use a **path_hook** (A function in +`sys.path_hooks` that returns a FileFinder) when you want to +re-interpret the meaning of an import string that refers to a module +object on or accessible by `sys.path`; PathHooks are important when you +want to add directories to sys.path that contain something _other than_ +`.py`, `.pyc/.pyo`, and `.so` modules conforming to the Python ABI. + + A *MetaFinder* is typically constructed when +it is added to `sys.meta_path`; a *PathHook* instantiates a *FileFinder* +when the PathHook function lays claim to the path. The developer +instantiates a MetaFinder before adding it to `sys.meta_path`; it's the +PathHook function that instantiates a FileFinder. + +## Next + +Note that PathHooks are for paths containing something _other than_ the +traditional (and hard-coded) source file extensions. The purpose of a +heterogeneous source file finder and loader is to enable finding in +directories within `sys.path` that contain other source files syntaxes +_alongside_ those traditional sources. I need to *eclipse* (that is, +get in front of) the default FileFinder with one that understands more +suffixes than those listed in either `imp.get_suffixes()` (Python 2) or +`importlib._bootstrap.SOURCE_SUFFIXES` (Python 3). I need one that will +return the Python default loader if it encounters the Python default +suffixes, but will invoke *our own* source file loader when encountering +one of our suffixes. + +We'll talk about loading next. diff --git a/docs/notes.html b/docs/notes.html new file mode 100644 index 0000000..eb1d6a5 --- /dev/null +++ b/docs/notes.html @@ -0,0 +1,51 @@ +

Python IMPORT

+

What is the exact syntax of the python import command?

+

Clarifying terminology

+

The language used for describing the import mechanism is confusing, often horribly so. Let's go with some clarification first.

+

When the 'import ' command is called, a procedure is triggered. That procedure then:

+
    +
  • attempts to find a corresponding python module
  • +
  • attempts to load that corresponding module into bytecode
  • +
  • Associates the bytecode with the name via sys.modules[fullname]
  • +
  • Exposes the bytecode to the calling scope.
  • +
+

Only the first three matter for our purposes.

+

FINDING

+

Finding is the act of identifying the a resource that can be compiled into a meaningful Python module. This resource could a file on a filesystem, a cell in a database, a remote web object, a stream of bytes in an object store, some content object in a compressed archive, or anything that can meaningfully be said described as an array of bytes. It could even be dynamically generated in some way.

+

Finding typically involves scanning a collection of resources against a collection of finders. Finding ends when "finder A, given fullname B, reports that a corresponding module can be found in resource C, and that the resource can be loaded with loader D."

+

METAFINDERS

+

Finders come first, and MetaFinders come before all other kinds of finders.

+

Most finding is done in the context of sys.path; that is, Python's primary means of organizing Python modules is to have them somewhere on the local filesystem. Sometimes, however, you want to get in front of that scan. That's what you do with a MetaFinder: A MetaFinder may have its own take on what to do with sys.path; it may choose to ignore sys.path entirely and do something with the import fullname that has nothing to do with the local filesystem.

+

A Finder is any object with the following function: [Loader|None] find_module([self|cls], fullname:string, path:[string|None])

+

If find_module returns None if it cannot find a loader resource for fullname & path.

+

MetaFinders are placed into the list sys.meta_path by whatever code needs a MetaFinder, and persist for the duration of the runtime provided they're not removed or replaced. Being a list, the search is ordered and first one wins. MetaFinders may be instantiated in any way the developer desires.

+

PATH_HOOK

+

PathHooks are how sys.path is scanned to determine the which Finder should be associated with a given directory path.

+

A PathHook is a function: [Finder|None] (path:string)

+

A PathHook is a function that takes a given directory path and, if the PathHook can identify a corresponding Finder for the modules in that directory path, returns the Finder, otherwise it returns None.

+

If no sys.meta_path finder returns a loader, the full array of sys.paths ⨯ sys.path_hooks is compared until a path_hook says it can handle the path and the corresponding finder says it can handle the fullname. If no match happens, Python's default import behavior is triggered.

+

PathHooks are placed into the list sys.path_hooks; like sys.meta_path, the list is ordered and first one wins.

+

LOADER

+

Loaders are returned by Finders, and are constructed by Finders with whatever resources the developer specifies the Finder has and can provide.

+

a collection of finders the fullname (the dot-separated string passed to the import function).

+

to find a corresponding python module, which is then compiled into Python bytecode and incorporated into the python runtime, where it will be accessible to the importing function or modules

+

MetaFinder: A python object with a single method:

+
(Loader|None) find_module(self, fullname:string, path:(string|None))
+

Python 2.7

+

iter_modules (iter_importers) -> calls iter_importer_modules for each importer in iter_importers

+

iter_importers (meta_path, get_importer) -> returns every importer in sys.meta_path + map(get_importer, sys.path)

+

get_importer(path):

+
returns a filtered list of sys.path_hooks for importers that can
+handle this path; if there is no match, returns ImpImporter(),
+which supplies a module iterator (ImpImporter.iter_modules) that
+relies on getmodulename.  
+
+* A path_hook is a function of (path -> Maybe importer)
+

iter_modules(path, get_importer, prefix) -> calls iter_importer_modules for each importer returned by path.map(get_importer)

+

iter_importer_modules(importer) -> returns list of (filename, ispkg) for each module understood by the importer * The method called depends on the class of the importer * The default is a generic call for "no specific importer" * For FILES, iter_import_modules returns a list of files whose extensions match those in imp.get_suffixes(), which is hard- coded into the interpreter. * MEANING: Unless your importer can handle heterogenous module suffixes, SourceFiles.iter_importer_modules can only find homogeonous modules.

+

This relationship issue holds for Python 2.6 as well.

+

Python 3.3

+
The same issue holds, although now most of the extensions have been
+moved to importlib._bootstrap.
+

It is the relationship between importlib.machinery.FileFinder and iterfile_finder_modules

+

That's killing us.

diff --git a/docs/notes.md b/docs/notes.md new file mode 100644 index 0000000..2257d5f --- /dev/null +++ b/docs/notes.md @@ -0,0 +1,236 @@ +# Python IMPORT + +What is the *exact* syntax of the python `import` command? What does it +mean when you write: + +``` +import alpha.beta.gamma +from alpha import beta +from alpha.beta import gamma +from .delta import epsilon +``` + +In each case, python is attempting to resolve the collection of dotted +names into a *module object*. + +**module object**: A resource that is or can be compiled into a +meaningful Python *module*. This resource could a file on a filesystem, +a cell in a database, a remote web object, a stream of bytes in an +object store, some content object in a compressed archive, or anything +that can meaningfully be said described as an array of bytes. It could +even be dynamically generated! + +**module**: The organizational unit of Python code. A namespace +containing Python objects, including classes, functions, submodules, and +immediately invoked code. Modules themselves may be collected into +*packages*. + +**package**: A python module which contains submodules or even +subpackages. The most common packaging scheme is a directory folder; in +this case the folder is a module if it contains an `__init__.py` file, +and it is a *package* if it contains other modules. The name of the +package is the folder name; the name of a submodule would be +`foldername.submodule`. This is called *regular packaging*. An +alternative method is known as *namespace packaging*. + +Python has a baroque but generally flexible mechanism for defining how +the dotted name is turned into a *module object*, which it calls *module +finding*, and for how that *module object* is turned into a *code +object* within the current Python session, called *module loading*. + +Python also has a means for *listing* modules. *Listing* is usually +done on a list of paths, using an appropriate means for accessing the +contents at the end of a path. + +The technical definition of a *package* is a module with a `__path__`, a +list of paths that contain submodules for the package. Subpackages get +their own` __path__`. A package can therefore accomodate `.` and `..` +prefixes in submodules, indicating relative paths to sibling modules. A +package can also and to its own `__path__` collection to enable access +to submodules elsewhere. + +# Clarifying terminology + +The language used for describing the import mechanism is confusing, +often horribly so. Let's go with some clarification first. + +When the 'import ' command is called, a procedure is +triggered. That procedure then: + +* attempts to *find* a corresponding python *module* +* attempts to *load* that corresponding module into *bytecode* +* Associates the bytecode with the name via sys.modules[fullname] +* Exposes the bytecode to the calling scope. + +Only the first three matter for our purposes. + +## FINDING + +*Finding* is the act of identifying a resource that corresponds to the +import string and can be compiled into a meaningful Python module. The +import string is typically called the *fullname*. + +*Finding* typically involves scanning a collection of *resources* +against a collection of *finders*. *Finding* ends when *finder `A`*, +given *fullname `B`*, reports that a corresponding module can be found +in *resource `C`*, and that the resource can be loaded with *loader +`D`*." + +### METAFINDERS + +*Finders* come first, and *MetaFinders* come before all other kinds of +finders. + +_Most finding is done in the context of `sys.path`_; that is, Python's +primary means of organizing Python modules is to have them somewhere on +the local filesystem, which makes sense. Sometimes, however, you want +to get in front of that scan. That's what you do with a MetaFinder: A +MetaFinder may have its own take on what to do with `sys.path`; it may +choose to ignore `sys.path` entirely and do something with the import +*fullname* that has nothing to do with the local filesystem. + +A Finder is any object with the following function: + [Loader|None] find_module([self|cls], fullname:string, path:[string|None]) + +If find_module returns None if it cannot find a loader resource for +fullname & path. + +A MetaFinder is placed into the list `sys.meta_path` by whatever code +needs the MetaFinder, and it persists for the duration of the runtime, +unless it is later removed or replaced. Being a list, the search is +ordered; first match wins. MetaFinders may be instantiated in any way +the developer desires before being added into `sys.meta_path`. + +### PATH_HOOK + +*PathHooks* are how `sys.path` is scanned to determine the which Finder +should be associated with a given directory path. + +A *PathHook* is a function: + [Finder|None] (path:string) + +A *PathHook* is a function that takes a given directory path and, if the +PathHook can identify a corresponding Finder for the modules in that +directory path, returns the Finder, otherwise it returns None. + +If no `sys.meta_path` finder returns a loader, the full array of +`sys.paths ⨯ sys.path_hooks` is compared until a PathHook says it can +handle the path and the corresponding finder says it can handle the +fullname. If no match happens, Python's default import behavior is +triggered. + +PathHooks are placed into the list `sys.path_hooks`; like +`sys.meta_path`, the list is ordered and first one wins. + +### LOADER + +*Loaders* are returned by *Finders*, and are constructed by Finders with +whatever resources the developer specifies the Finder has and can +provide. The Loader is responsible for pulling the content of the +*module object* into Python's memory and processing it into a *module*, +whether by calling Python's `eval()/compile()` functions on standard +Python code, or by some other means. + + + +a collection of *finders* the *fullname* (the dot-separated string passed to the `import` +function). + + + +to find a +corresponding python module, which is then compiled into Python bytecode +and incorporated into the python runtime, where it will be accessible to +the importing function or modules + +MetaFinder: A python object with a single method: + + (Loader|None) find_module(self, fullname:string, path:(string|None)) + + + + + +Python 2.7 + +iter_modules (iter_importers) -> + calls iter_importer_modules for each importer in iter_importers + +iter_importers (meta_path, get_importer) -> + returns every importer in sys.meta_path + map(get_importer, sys.path) + +get_importer(path): + + returns a filtered list of sys.path_hooks for importers that can + handle this path; if there is no match, returns ImpImporter(), + which supplies a module iterator (ImpImporter.iter_modules) that + relies on getmodulename. + + * A path_hook is a function of (path -> Maybe importer) + +iter_modules(path, get_importer, prefix) -> + calls iter_importer_modules for each importer returned by path.map(get_importer) + +iter_importer_modules(importer) -> + returns list of (filename, ispkg) for each module understood by the importer + * The method called depends on the class of the importer + * The default is a generic call for "no specific importer" + * For FILES, iter_import_modules returns a list of files whose + extensions match those in imp.get_suffixes(), which is hard- + coded into the interpreter. + * MEANING: Unless your importer can handle heterogenous module + suffixes, SourceFiles.iter_importer_modules can only find + homogeonous modules. + +This relationship issue holds for Python 2.6 as well. + +Python 3.3 + + The same issue holds, although now most of the extensions have been + moved to importlib._bootstrap. + +It is the relationship between + importlib.machinery.FileFinder +and + _iter_file_finder_modules + +That's killing us. + + + +--- + +So the ONLY thing I have to do, according to Python, is assert that +there's a dir/__init__.suff and attempt to load it! If I do that, I can +make it work? + +No: The search for __init__.suff is only the first + + +--- + +test_import: test_with_extension "py" and "my" +test_execute_bit_not_set (on Posix system, .pyc files got their +executable bit set if the .py file had it set; it looks as if Python +just copied the permissions, if it had permission to do so. We should +follow the example of 2.7 & 3.4, and NOT set +x if we can help it). + +test_rewrite_pyc_with_read_only_source (on Posix systems, if the .py +file had read-only set, the .pyc file would too, making updates +problematic). + +test_import_name_binding + +test_bug7732 (attempt to import a '.my' file that's not a file) + + + +These are more Hy-related: + +test_module_with_large_stack (see python example) + +test_failing_import_sticks + +test_failing_reload + + diff --git a/docs/notes.txt b/docs/notes.txt index 47f65b9..ad4c919 100644 --- a/docs/notes.txt +++ b/docs/notes.txt @@ -1,3 +1,106 @@ +Clarifying terminology + +The language used for describing the import mechanism is confusing, +often horribly so. Let's go with some clarification first. + +When the 'import ' command is called, a procedure is +triggered. That procedure then: + +* attempts to *find* a corresponding python *module* +* attempts to *load* that corresponding module into *bytecode* +* Associates the bytecode with the name via sys.modules[fullname] +* Exposes the bytecode to the calling scope. + +Only the first three matter for our purposes. + +## FINDING + +*Finding* is the act of identifying the a resource that can be compiled +into a meaningful Python module. This resource could a file on a +filesystem, a cell in a database, a remote web object, a stream of bytes +in an object store, some content object in a compressed archive, or +anything that can meaningfully be said described as an array of bytes. +It could even be dynamically generated in some way. + +*Finding* typically involves scanning a collection of *resources* +against a collection of *finders*. *Finding* ends when "*finder __A__*, +given *fullname __B__*, reports that a corresponding module can be found +in *resource __C__*, and that the resource can be loaded with *loader +__D__*." + +### METAFINDERS + +*Finders* come first, and *MetaFinders* come before all other kinds of +finders. + +_Most finding is done in the context of `sys.path`_; that is, Python's +primary means of organizing Python modules is to have them somewhere on +the local filesystem. Sometimes, however, you want to get in front of +that scan. That's what you do with a MetaFinder: A MetaFinder may have +its own take on what to do with `sys.path`; it may choose to ignore +`sys.path` entirely and do something with the import *fullname* that has +nothing to do with the local filesystem. + +A Finder is any object with the following function: + [Loader|None] find_module([self|cls], fullname:string, path:[string|None]) + +If find_module returns None if it cannot find a loader resource for +fullname & path. + +MetaFinders are placed into the list `sys.meta_path` by whatever code +needs a MetaFinder, and persist for the duration of the runtime provided +they're not removed or replaced. Being a list, the search is ordered +and first one wins. MetaFinders may be instantiated in any way the +developer desires. + +### PATH_HOOK + +*PathHooks* are how `sys.path` is scanned to determine the +which Finder should be associated with a given directory path. + +A *PathHook* is a function: + [Finder|None] (path:string) + +A *PathHook* is a function that takes a given directory path and, if the +PathHook can identify a corresponding Finder for the modules in that +directory path, returns the Finder, otherwise it returns None. + +If no `sys.meta_path` finder returns a loader, the full array of +`sys.paths ⨯ sys.path_hooks` is compared until a path_hook says it can +handle the path and the corresponding finder says it can handle the +fullname. If no match happens, Python's default import behavior is +triggered. + + +PathHooks are placed into the list `sys.path_hooks`; like +`sys.meta_path`, the list is ordered and first one wins. + +### LOADER + +*Loaders* are returned by *Finders*, and are constructed by Finders with +whatever resources the developer specifies the Finder has and can +provide. + + + +a collection of *finders* the *fullname* (the dot-separated string passed to the `import` +function). + + + +to find a +corresponding python module, which is then compiled into Python bytecode +and incorporated into the python runtime, where it will be accessible to +the importing function or modules + +MetaFinder: A python object with a single method: + + (Loader|None) find_module(self, fullname:string, path:(string|None)) + + + + + Python 2.7 iter_modules (iter_importers) -> @@ -46,4 +149,4 @@ That's killing us. - \ No newline at end of file + diff --git a/polyloader/_python2.py b/polyloader/_python2.py index 6f34d0a..bee8879 100644 --- a/polyloader/_python2.py +++ b/polyloader/_python2.py @@ -1,104 +1,167 @@ +import io import os import os.path +import stat import sys +import imp import pkgutil -class PolyLoader(pkgutil.ImpLoader): +class PolyLoader(): _loader_handlers = [] _installed = False - def is_package(self, fullname): - dirpath = "/".join(fullname.split(".")) - for pth in sys.path: - pth = os.path.abspath(pth) - for (compiler, suffix) in self._loader_handlers: - composed_path = "%s/%s/__init__.%s" % (pth, dirpath, suffix) - if os.path.exists(composed_path): - return True - return False + def __init__(self, fullname, path, is_pkg): + self.fullname = fullname + self.path = path + self.is_package = is_pkg + @classmethod + def _install(cls, compiler, suffixes): + if isinstance(suffixes, basestring): + suffixes = [suffixes] + suffixes = set(suffixes) + overlap = suffixes.intersection(set([suf[0] for suf in imp.get_suffixes()])) + if overlap: + raise RuntimeError("Override of native Python extensions is not permitted.") + overlap = suffixes.intersection( + set([suffix for (compiler, suffix) in cls._loader_handlers])) + if overlap: + raise RuntimeWarning( + "Insertion of %s overrides already installed compiler." % + ', '.join(list(overlap))) + cls._loader_handlers += [(compiler, suf) for suf in suffixes] def load_module(self, fullname): if fullname in sys.modules: return sys.modules[fullname] - + + if fullname != self.fullname: + raise ImportError("Load confusion: %s vs %s." % (fullname, self.fullname)) + matches = [(compiler, suffix) for (compiler, suffix) in self._loader_handlers - if path.endswith(suffix)] + if self.path.endswith(suffix)] + if matches.length == 0: - raise ImportError("%s is not a recognized module?" % name) + raise ImportError("%s is not a recognized module?" % fullname) if matches.length > 1: raise ImportError("Multiple possible resolutions for %s: %s" % ( - name, ', '.join([suffix for (compiler, suffix) in matches]))) + fullname, ', '.join([suffix for (compiler, suffix) in matches]))) compiler = matches[0] - module = compiler(name, path) - module.__file__ = self.filename - module.__name__ = self.fullname + with io.FileIO(self.path, 'r') as file: + source_text = file.read() - if self.is_package(fullname): - module.__path__ = self.path_entry + module = compiler(source_text, fullname, self.path) + module.__file__ = self.path + module.__name__ = self.fullname + module.__package__ = '.'.join(fullname.split('.')[:-1]) + + if self.is_package: + module.__path__ = [os.path.dirname(self.path)] module.__package__ = fullname - else: - module.__package__ = '.'.join(fullname.split('.')[:-1]) sys.modules[fullname] = module return module -# Problem to be solved: pkgutil.iter_modules depends upon -# get_importer, which requires that we uses path_hooks, not meta_path. -# This is acceptable (see: https://pymotw.com/2/sys/imports.html), but -# then it depends upon the inspect get_modulename, which in turn is -# dependent upon the __builtin__.imp.get_suffixes(), which excludes -# anything other than the builtin-recognizes suffixes. The -# arrangement, as of Python 2.7, excludes heterogenous packages from -# being locatable by pkgutil.iter_modules. -# -# iter_modules use of the simplegeneric protocol makes things even -# harder, as the order in which finders are loaded is not available at -# runtime. -# -# Possible solutions: We provide our own pkgutils, which in turn hacks -# the iter_modules; or we provide our own finder and ensure it gets -# found before the native one. +# PolyFinder is an implementation of the Finder class from Python 2.7, +# with embellishments gleefully copied from Python 3.4. It supports +# all the same functionality for non-.py sourcefiles with the added +# benefit of falling back to Python's default behavior. -# Why the heck python 2.6 insists on calling finders "importers" is -# beyond me. At least in calls loaders "loaders". +# Polyfinder is instantiated by _polyloader_pathhook() class PolyFinder(object): - def __init__(self, path = None): - self.path = path - + def __init__(self, path=None): + self.path = path or '.' + def _pl_find_on_path(self, fullname, path=None): - subname = fullname.split(".")[-1] - if subname != fullname and self.path is None: + splitname = fullname.split(".") + if self.path is None and splitname[-1] != fullname: return None - # As in the original, we ignore the 'path' argument - path = None - if self.path is not None: - path = [os.path.realpath(self.path)] + + dirpath = "/".join(splitname) + path = [os.path.realpath(self.path)] - fls = ["%s/__init__.%s", "%s.%s"] - for fp in fls: + fls = [("%s/__init__.%s", True), ("%s.%s", False)] + for (fp, ispkg) in fls: for (compiler, suffix) in PolyLoader._loader_handlers: - composed_path = fp % ("%s/%s" % (pth, dirpath), suffix) - if os.path.exists(composed_path): - return PolyLoader(composed_path) + composed_path = fp % ("%s/%s" % (path, dirpath), suffix) + if os.path.isfile(composed_path): + return PolyLoader(fullname, composed_path, ispkg) + + # Fall back onto Python's own methods. try: - file, filename, etc = imp.find_module(subname, path) + file, filename, etc = imp.find_module(fullname, path) except ImportError: return None - return ImpLoader(fullname, file, filename, etc) - + return pkgutil.ImpLoader(fullname, file, filename, etc) + def find_module(self, fullname, path=None): - path = self._pl_find_on_path(fullname) - if path: - return PolyLoader(path) - return None + return self._pl_find_on_path(fullname) -def _install(compiler, suffixes): + @staticmethod + def getmodulename(path): + filename = os.path.basename(path) + suffixes = ([(-len(suf[0]), suf[0]) for suf in imp.get_suffixes()] + + [(-len(suf[1]), suf[1]) for suf in PolyLoader.loader_handlers]) + suffixes.sort() + for neglen, suffix in suffixes: + if filename[neglen:] == suffix: + return (filename[:neglen], suffix) + + def iter_modules(self, prefix=''): + if self.path is None or not os.path.isdir(self.path): + return -sys.meta_path.insert(0, MetaImporter()) -iter_importer_modules.register(MetaImporter, meta_iterate_modules) + yielded = {} + + try: + filenames = os.listdir(self.path) + except OSError: + # ignore unreadable directories like import does + filenames = [] + filenames.sort() + for fn in filenames: + modname = self.getmodulename(fn) + if modname=='__init__' or modname in yielded: + continue + + path = os.path.join(self.path, fn) + ispkg = False + + if not modname and os.path.isdir(path) and '.' not in fn: + modname = fn + try: + dircontents = os.listdir(path) + except OSError: + # ignore unreadable directories like import does + dircontents = [] + for fn in dircontents: + subname = self.getmodulename(fn) + if subname=='__init__': + ispkg = True + break + else: + continue # not a package + + if modname and '.' not in modname: + yielded[modname] = 1 + yield prefix + modname, ispkg + + + +def _polyloader_pathhook(path): + if not os.path.isdir(path): + raise ImportError('Only directories are supported', path = path) + return PolyFinder(path) + + +def install(compiler, suffixes): + if not PolyLoader._installed: + sys.path_hooks.append(_polyloader_pathhook) + PolyLoader._installed = True + PolyLoader._install(compiler, suffixes) + diff --git a/tests/test_polyloader.py b/tests/test_polyloader.py index 82cf168..5942db9 100644 --- a/tests/test_polyloader.py +++ b/tests/test_polyloader.py @@ -17,9 +17,8 @@ from polyloader import polyloader # correct compiler has been found for a given extension. def compiler(pt): - def _compiler(source_path, modulename): - with open(source_path, "r") as file: - return compile("result='Success for %s: %s'" % (pt, file.readline().rstrip()), modulename, "exec") + def _compiler(source_text, modulename): + return compile("result='Success for %s: %s'" % (pt, source_text.rstrip()), modulename, "exec") return _compiler class Test_Polymorph_1(object):