git-linter/git_lint/git_lint.py

463 lines
17 KiB
Python
Raw Normal View History

from functools import reduce
from collections import namedtuple
2016-09-23 22:27:15 +00:00
import getopt
import gettext
2016-09-23 20:11:06 +00:00
import operator
2016-09-23 22:27:15 +00:00
import os
2016-09-29 19:38:23 +00:00
import shutil
2016-09-23 20:11:06 +00:00
import re
2016-09-23 22:27:15 +00:00
import subprocess
2016-09-23 20:11:06 +00:00
import sys
2016-09-26 18:44:31 +00:00
import pprint
try:
import configparser
except ImportError as e:
import ConfigParser as configparser
2016-09-23 22:27:15 +00:00
2016-09-23 20:11:06 +00:00
_ = gettext.gettext
2016-09-26 22:51:31 +00:00
2016-09-26 21:58:43 +00:00
# ___ __ _ ___ _
# / __|___ _ _ / _(_)__ _ | _ \___ __ _ __| |___ _ _
# | (__/ _ \ ' \| _| / _` | | / -_) _` / _` / -_) '_|
2016-09-26 21:58:43 +00:00
# \___\___/_||_|_| |_\__, | |_|_\___\__,_\__,_\___|_|
# |___/
# (commandLineDictionary, repositoryLocation) -> (configurationDictionary | exit)
2016-09-28 21:26:06 +00:00
def load_config(options, base):
"""Loads the git-lint configuration file.
Returns the configuration file as a dictionary of dictionaries.
Performs substitutions as specified in the SafeConfigParser
specification; the only one performed currently is the 'repodir'
will be replaced with the base directory of the repository.
Combined with the option to specify the .git-lint configuration as
a directory, this allows users to keep per-project configuration
files for specific linters.
"""
2016-10-05 21:58:07 +00:00
def find_config_file(options, base):
""" Returns the configuration file from a prioritized list of locations.
Locations are prioritized as:
1. From the command line. Fail if specified but not found
2. The repository's root directory, as the file .git-lint
3. The repository's root directory, as the file .git-lint/config
4. The user's home directory, as file .git-lint
5. The user's home directory, as the file .git-lint/config
If no configuration file is found, this is an error.
"""
if 'config' in options:
config = options['config']
configpath = os.path.abspath(config)
if not os.path.isfile(configpath):
sys.exit(_('Configuration file not found: {}\n').format(config))
return configpath
home = os.environ.get('HOME', None)
possibles = [os.path.join(base, '.git-lint'),
os.path.join(base, '.git-lint/config')] + ((home and [
os.path.join(home, '.git-lint'),
os.path.join(home, '.git-lint/config')]) or [])
matches = [p for p in possibles if os.path.isfile(p)]
if len(matches) == 0:
sys.exit(_('No configuration file found, tried: {}').format(':'.join(possibles)))
return matches[0]
Linter = namedtuple('Linter', ['name', 'linter'])
path = find_config_file(options, base)
configloader = configparser.SafeConfigParser()
configloader.read(path)
configloader.set('DEFAULT', 'repodir', base)
return [Linter(section, {k: v for (k, v) in configloader.items(section)})
for section in configloader.sections()]
2016-09-26 21:58:43 +00:00
# ___ _ _
# / __(_) |_
# | (_ | | _|
# \___|_|\__|
2016-09-26 21:58:43 +00:00
#
2016-09-23 20:11:06 +00:00
def get_git_response_raw(cmd):
fullcmd = (['git'] + cmd)
process = subprocess.Popen(fullcmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
2016-09-23 22:27:15 +00:00
(out, err) = process.communicate()
return (out, err, process.returncode)
2016-09-23 20:11:06 +00:00
def get_git_response(cmd):
2016-09-23 22:27:15 +00:00
(out, error, returncode) = get_git_response_raw(cmd)
return out
2016-09-23 20:11:06 +00:00
def split_git_response(cmd):
2016-09-23 22:27:15 +00:00
(out, error, returncode) = get_git_response_raw(cmd)
return out.splitlines()
2016-09-23 20:11:06 +00:00
def run_git_command(cmd):
2016-09-23 22:27:15 +00:00
fullcmd = (['git'] + cmd)
return subprocess.call(fullcmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
2016-09-26 21:58:43 +00:00
universal_newlines=True)
2016-09-23 20:11:06 +00:00
def get_shell_response(fullcmd):
process = subprocess.Popen(fullcmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True)
2016-09-23 22:27:15 +00:00
(out, err) = process.communicate()
return (out, err, process.returncode)
2016-09-23 20:11:06 +00:00
2016-09-23 22:27:15 +00:00
def get_git_base():
(out, error, returncode) = get_git_response_raw(
['rev-parse', '--show-toplevel'])
return (returncode == 0 and out.rstrip()) or None
2016-09-23 20:11:06 +00:00
2016-09-23 22:27:15 +00:00
def get_git_head():
empty_repository_hash = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
(out, err, returncode) = get_git_response_raw(
['rev-parse', '--verify HEAD'])
return ((err and empty_repository_hash) or 'HEAD')
2016-09-23 20:11:06 +00:00
2016-09-23 22:27:15 +00:00
git_base = get_git_base()
git_head = get_git_head()
2016-09-23 20:11:06 +00:00
2016-09-26 21:58:43 +00:00
# _ _ _ _ _ _ _ _
# | | | | |_(_) (_) |_(_)___ ___
# | |_| | _| | | | _| / -_|_-<
# \___/ \__|_|_|_|\__|_\___/__/
2016-09-26 21:58:43 +00:00
#
2016-09-23 20:11:06 +00:00
2016-09-28 21:26:06 +00:00
class MatchFilter:
2016-09-28 21:26:06 +00:00
def __init__(self, config):
self.matcher = self.make_match_filter_matcher([v.linter.get('match', '') for v in config])
2016-09-23 20:11:06 +00:00
2016-09-28 21:26:06 +00:00
def __call__(self, path):
return self.matcher.search(path)
2016-09-28 21:26:06 +00:00
@staticmethod
def make_match_filter_matcher(extensions):
trimmed = [s.strip() for s in reduce(operator.add,
[ex.split(',') for ex in extensions], [])]
cleaned = [re.sub(r'^\.', '', s) for s in trimmed]
return re.compile(r'\.' + '|'.join(cleaned) + r'$')
2016-09-23 20:11:06 +00:00
2016-09-26 21:58:43 +00:00
# ___ _ _ _ _ _
# / __| |_ ___ __| |__ | (_)_ _| |_ ___ _ _ ___
# | (__| ' \/ -_) _| / / | | | ' \ _/ -_) '_(_-<
# \___|_||_\___\__|_\_\ |_|_|_||_\__\___|_| /__/
2016-09-26 21:58:43 +00:00
#
2016-09-23 20:11:06 +00:00
def executable_exists(script, label):
2016-09-23 22:27:15 +00:00
if not len(script):
sys.exit(
_('Syntax error in command configuration for {} ').format(label))
2016-09-23 20:11:06 +00:00
2016-09-23 22:27:15 +00:00
scriptname = script.split(' ').pop(0)
if not len(scriptname):
sys.exit(
_('Syntax error in command configuration for {} ').format(label))
2016-09-23 22:27:15 +00:00
def is_executable(path):
2016-09-23 22:27:15 +00:00
return os.path.exists(path) and os.access(path, os.X_OK)
if scriptname.startswith('/'):
return (is_executable(scriptname) and scriptname) or None
2016-09-23 20:11:06 +00:00
2016-09-30 01:36:27 +00:00
# shutil.which() doesn't appear until Python 3, darnit.
possibles = [path for path in
[os.path.join(path, scriptname)
for path in os.environ.get('PATH').split(':')]
if is_executable(path)]
return (len(possibles) and possibles.pop(0)) or False
2016-09-23 22:27:15 +00:00
2016-09-26 21:58:43 +00:00
def get_linter_status(config):
2016-10-05 21:58:07 +00:00
def get_working_linter_names(config):
return [i.name for i in config
if executable_exists(i.linter['command'], i.name)]
2016-09-26 21:58:43 +00:00
working_linter_names = get_working_linter_names(config)
broken_linter_names = (set([i.name for i in config]) - set(working_linter_names))
return working_linter_names, broken_linter_names
# ___ _ _ _ _ __ __ _ _
# / __|___| |_ | (_)__| |_ ___ / _| / _(_) |___ ___
# | (_ / -_) _| | | (_-< _| / _ \ _| | _| | / -_|_-<
# \___\___|\__| |_|_/__/\__| \___/_| |_| |_|_\___/__/
2016-09-26 21:58:43 +00:00
#
2016-09-23 20:11:06 +00:00
2016-09-28 21:26:06 +00:00
def get_filelist(options, extras):
""" Returns the list of files against which we'll run the linters. """
2016-09-23 20:11:06 +00:00
def base_file_filter(files):
""" Return the full path for all files """
return [os.path.join(git_base, file) for file in files]
2016-09-23 20:11:06 +00:00
def cwd_file_filter(filenames):
""" Return the full path for only those files in the cwd and down """
if os.path.samefile(os.getcwd(), git_base):
return base_file_filter(filenames)
gitcwd = os.path.join(os.path.relpath(os.getcwd(), git_base), '')
return base_file_filter([filename for filename in filenames
if filename.startswith(gitcwd)])
2016-09-23 20:11:06 +00:00
def check_for_conflicts(filesets):
""" Scan list of porcelain files for merge conflic state. """
MERGE_CONFLICT_PAIRS = set(['DD', 'DU', 'AU', 'AA', 'UD', 'UA', 'UU'])
status_pairs = set(['' + f[0] + f[1] for f in filesets])
if len(status_pairs & MERGE_CONFLICT_PAIRS):
sys.exit(
_('Current repository contains merge conflicts. Linters will not be run.'))
return filesets
2016-09-23 20:11:06 +00:00
def remove_submodules(files):
""" Remove all submodules from the list of files git-lint cares about. """
2016-09-23 20:11:06 +00:00
fixer_re = re.compile('^(\\.\\.\\/)+')
submodules = split_git_response(['submodule', 'status'])
submodule_names = [fixer_re.sub('', submodule.split(' ')[2])
for submodule in submodules]
return [file for file in files if (file not in submodule_names)]
2016-09-23 20:11:06 +00:00
def get_porcelain_status():
""" Return the status of all files in the system. """
cmd = ['status', '-z', '--porcelain',
'--untracked-files=all', '--ignore-submodules=all']
stream = [entry for entry in get_git_response(cmd).split(u'\x00')
if len(entry) > 0]
2016-09-23 20:11:06 +00:00
2016-09-30 01:36:27 +00:00
# Yeah, baby, recursion, the way this is meant to be handled.
# If you have more than 999 files that need linting, you have
# a bigger problem...
def parse_stream(acc, stream):
"""Parse the list of files. T
2016-09-23 20:11:06 +00:00
The list is null-terminated, but is not columnar. If
there's an 'R' in the index state, it means the file was
renamed and the old name is added as a column, so it's a
special case as we accumulate the list of files.
"""
2016-09-23 20:11:06 +00:00
if len(stream) == 0:
return acc
entry = stream.pop(0)
(index, workspace, filename) = (entry[0], entry[1], entry[3:])
if index == 'R':
stream.pop(0)
return parse_stream(acc + [(index, workspace, filename)], stream)
2016-09-23 20:11:06 +00:00
return check_for_conflicts(parse_stream([], stream))
def staging_list():
""" Return the list of files added or modified to the stage """
2016-09-23 20:11:06 +00:00
return [filename for (index, workspace, filename) in get_porcelain_status()
if index in ['A', 'M']]
def working_list():
2016-09-26 21:58:43 +00:00
""" Return the list of files that have been modified in the workspace.
Includes the '?' to include files that git is not currently tracking.
"""
return [filename for (index, workspace, filename) in get_porcelain_status()
if workspace in ['A', 'M', '?']]
def all_list():
""" Return all the files git is currently tracking for this repository. """
cmd = ['ls-tree', '--name-only', '--full-tree', '-r', '-z', git_head]
return [file for file in get_git_response(cmd).split(u'\x00')
if len(file) > 0]
2016-09-26 18:44:31 +00:00
if len(extras):
cwd = os.path.abspath(os.getcwd())
extras_fullpathed = set([os.path.abspath(os.path.join(cwd, f)) for f in extras])
2016-09-26 18:44:31 +00:00
not_found = set([f for f in extras_fullpathed if not os.path.isfile(f)])
return ([os.path.relpath(f, cwd) for f in (extras_fullpathed - not_found)], not_found)
2016-09-26 21:58:43 +00:00
2016-09-23 22:27:15 +00:00
working_directory_trans = cwd_file_filter
2016-09-28 21:26:06 +00:00
if 'base' in options or 'every' in options:
2016-09-23 22:27:15 +00:00
working_directory_trans = base_file_filter
2016-09-23 20:11:06 +00:00
2016-09-23 22:27:15 +00:00
file_list_generator = working_list
2016-09-28 21:26:06 +00:00
if 'all' in options:
file_list_generator = all_list
2016-09-28 21:26:06 +00:00
if 'staging' in options:
2016-09-23 22:27:15 +00:00
file_list_generator = staging_list
2016-09-23 20:11:06 +00:00
return (working_directory_trans(remove_submodules(file_list_generator())), [])
2016-09-23 20:11:06 +00:00
2016-09-26 21:58:43 +00:00
# ___ _ _
# / __| |_ __ _ __ _(_)_ _ __ _ __ __ ___ _ __ _ _ __ _ __ ___ _ _
# \__ \ _/ _` / _` | | ' \/ _` | \ V V / '_/ _` | '_ \ '_ \/ -_) '_|
2016-09-26 21:58:43 +00:00
# |___/\__\__,_\__, |_|_||_\__, | \_/\_/|_| \__,_| .__/ .__/\___|_|
# |___/ |___/ |_| |_|
class StagingRunner:
def __init__(self, filenames):
self.filenames = filenames
2016-09-28 21:26:06 +00:00
def __enter__(self):
2016-09-23 20:11:06 +00:00
def time_gather(f):
2016-09-23 22:27:15 +00:00
stats = os.stat(f)
2016-09-26 22:26:04 +00:00
return (f, (stats.st_atime, stats.st_mtime))
self.times = [time_gather(filename) for filename in self.filenames]
run_git_command(['stash', '--keep-index'])
2016-09-23 20:11:06 +00:00
def __exit__(self, type, value, traceback):
run_git_command(['reset', '--hard'])
run_git_command(['stash', 'pop', '--quiet', '--index'])
for (filename, timepair) in self.times:
2016-09-23 22:27:15 +00:00
os.utime(filename, timepair)
class WorkspaceRunner(object):
def __init__(self, filenames):
pass
def __enter__(self):
pass
def __exit__(self, type, value, traceback):
pass
2016-09-26 21:58:43 +00:00
# ___ _ _ _
# | _ \_ _ _ _ | (_)_ _| |_ _ __ __ _ ______
# | / || | ' \ | | | ' \ _| | '_ \/ _` (_-<_-<
# |_|_\\_,_|_||_| |_|_|_||_\__| | .__/\__,_/__/__/
# |_|
2016-09-23 20:11:06 +00:00
2016-09-28 21:57:02 +00:00
class Linters:
def __init__(self, linters, filenames):
2016-09-28 21:26:06 +00:00
self.linters = linters
self.filenames = filenames
2016-09-23 20:11:06 +00:00
2016-09-28 21:26:06 +00:00
@staticmethod
def encode_shell_messages(prefix, messages):
return ['{}{}'.format(prefix, line)
for line in messages.splitlines()]
2016-09-28 21:26:06 +00:00
@staticmethod
def run_external_linter(filename, linter, linter_name):
"""Run one linter against one file.
2016-09-23 20:11:06 +00:00
2016-09-28 21:26:06 +00:00
If the result matches the error condition specified in the configuration file,
return the error code and messages, otherwise return nothing.
2016-09-28 21:26:06 +00:00
"""
2016-09-26 22:52:46 +00:00
2016-09-28 21:26:06 +00:00
cmd = linter['command'] + ' "' + filename + '"'
(out, err, returncode) = get_shell_response(cmd)
failed = ((out and (linter.get('condition', 'error') == 'output')) or err or (not (returncode == 0)))
2016-09-26 22:51:31 +00:00
trimmed_filename = filename.replace(git_base + '/', '', 1)
2016-09-28 21:26:06 +00:00
if not failed:
return (trimmed_filename, linter_name, 0, [])
2016-09-28 22:22:34 +00:00
prefix = (((linter.get('print', 'false').strip().lower() != 'true') and ' ') or
' {}: '.format(trimmed_filename))
output = (Linters.encode_shell_messages(prefix, out) +
((err and Linters.encode_shell_messages(prefix, err)) or []))
2016-09-28 21:26:06 +00:00
return (trimmed_filename, linter_name, (returncode or 1), output)
2016-09-28 21:26:06 +00:00
@staticmethod
def run_one_linter(linter, filenames):
""" Runs one linter against a set of files
2016-09-28 21:26:06 +00:00
Creates a match filter for the linter, extract the files to be
linted, and runs the linter against each file, returning the
result as a list of successes and failures. Failures have a
return code and the output of the lint process.
"""
2016-09-28 21:57:02 +00:00
match_filter = MatchFilter([linter])
2016-09-28 21:26:06 +00:00
files = set([filename for filename in filenames if match_filter(filename)])
2016-09-28 21:57:02 +00:00
return [Linters.run_external_linter(filename, linter.linter, linter.name) for filename in files]
2016-09-26 21:58:43 +00:00
2016-09-28 21:57:02 +00:00
def __call__(self):
2016-09-28 21:26:06 +00:00
""" Returns a function to run a set of linters against a set of filenames
2016-09-28 21:26:06 +00:00
This returns a function because it's going to be wrapped in a
runner to better handle stashing and restoring a staged commit.
"""
return reduce(operator.add,
2016-09-28 21:57:02 +00:00
[Linters.run_one_linter(linter, self.filenames) for linter in self.linters], [])
2016-09-28 21:26:06 +00:00
def dryrun(self):
2016-09-28 21:26:06 +00:00
def dryrunonefile(filename, linter):
trimmed_filename = filename.replace(git_base + '/', '', 1)
return (trimmed_filename, linter.name, 0, [' {}'.format(trimmed_filename)])
2016-09-28 21:26:06 +00:00
def dryrunonce(linter, filenames):
match_filter = MatchFilter([linter])
files_to_check = [filename for filename in filenames if match_filter(filename)]
return [dryrunonefile(filename, linter) for filename in files_to_check]
2016-09-28 21:26:06 +00:00
return reduce(operator.add, [dryrunonce(linter, self.filenames) for linter in self.linters], [])
2016-09-30 01:36:27 +00:00
def run_linters(options, config, extras=[]):
2016-09-23 20:11:06 +00:00
def build_config_subset(keys):
""" Returns a subset of the configuration, with only those linters mentioned in keys """
return [item for item in config if item.name in keys]
""" Runs the requested linters """
2016-09-28 21:26:06 +00:00
all_filenames, unfindable_filenames = get_filelist(options, extras)
2016-09-26 21:58:43 +00:00
2016-09-28 21:26:06 +00:00
is_lintable = MatchFilter(config)
2016-09-26 22:51:31 +00:00
2016-09-26 21:58:43 +00:00
lintable_filenames = set([filename for filename in all_filenames
if is_lintable(filename)])
2016-09-26 22:51:31 +00:00
unlintable_filenames = set(all_filenames) - lintable_filenames
2016-09-26 21:58:43 +00:00
working_linter_names, broken_linter_names = get_linter_status(config)
2016-09-26 22:51:31 +00:00
2016-09-28 21:26:06 +00:00
cant_lint_filter = MatchFilter(build_config_subset(
2016-09-26 21:58:43 +00:00
broken_linter_names))
2016-09-26 22:51:31 +00:00
2016-09-26 21:58:43 +00:00
cant_lint_filenames = [filename for filename in lintable_filenames
if cant_lint_filter(filename)]
2016-09-26 18:46:47 +00:00
runner = WorkspaceRunner
if 'staging' in options:
runner = StagingRunner
2016-09-28 21:57:02 +00:00
linters = Linters(build_config_subset(working_linter_names),
sorted(lintable_filenames))
if 'dryrun' in options:
dryrun_results = linters.dryrun()
return (dryrun_results, unlintable_filenames, cant_lint_filenames,
broken_linter_names, unfindable_filenames)
2016-09-28 21:57:02 +00:00
with runner(lintable_filenames):
results = linters()
2016-09-26 18:44:31 +00:00
2016-09-28 21:26:06 +00:00
return (results, unlintable_filenames, cant_lint_filenames,
broken_linter_names, unfindable_filenames)