Archive for November, 2008

Adding GIT support to Meld

Wednesday, November 19th, 2008

Meld is a great diffing/merging tool with version control support. GIT support doesn’t come out of the box though. To enable GIT support you need to copy this file into your /usr/lib/meld/vc directory. Then you can open the directory where your GIT repository is checked out (using New -> Version Control Browser of course).

I would like to include the contents of the file here as well (git.py):

# -*- coding: utf-8 -*- 

# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

### Copyright (C) 2002-2005 Stephen Kennedy <stevek@gnome.org>
### Copyright (C) 2005 Aaron Bentley <aaron.bentley@utoronto.ca>
### Copyright (C) 2007 José Fonseca <j_r_fonseca@yahoo.co.uk>

### Redistribution and use in source and binary forms, with or without
### modification, are permitted provided that the following conditions
### are met:
### 
### 1. Redistributions of source code must retain the above copyright
###    notice, this list of conditions and the following disclaimer.
### 2. Redistributions in binary form must reproduce the above copyright
###    notice, this list of conditions and the following disclaimer in the
###    documentation and/or other materials provided with the distribution.

### THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
### IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
### OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
### IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
### INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
### NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
### DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
### THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
### (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
### THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import errno
import _vc

class Vc(_vc.Vc):

    CMD = "git"
    NAME = "Git"
    PATCH_STRIP_NUM = 1
    PATCH_INDEX_RE = "^diff --git a/(.*) b/.*$"

    def __init__(self, location):
        self._tree_cache = None
        while location != "/":
            if os.path.isdir( "%s/.git" % location):
                self.root = location
                return
            location = os.path.dirname(location)
        raise ValueError()

    def commit_command(self, message):
        return [self.CMD,"commit","-m",message]
    def diff_command(self):
        return [self.CMD,"diff","HEAD"]
    def update_command(self):
        return [self.CMD,"pull"]
    def add_command(self, binary=0):
        return [self.CMD,"add"]
    def remove_command(self, force=0):
        return [self.CMD,"rm"]
    def revert_command(self):
        return [self.CMD,"checkout"]
    def get_working_directory(self, workdir):
        if workdir.startswith("/"):
            return self.root
        else:
            return ''

    def cache_inventory(self, topdir):
        self._tree_cache = self.lookup_tree()

    def uncache_inventory(self):
        self._tree_cache = None

    def lookup_tree(self):
        while 1:
            try:
                proc = os.popen("cd %s && git status --untracked-files" % self.root)
                entries = proc.read().split("\\n")[:-1]
                break
            except OSError, e:
                if e.errno != errno.EAGAIN:
                    raise
        statemap = {
            "unknown": _vc.STATE_NONE,
            "new file": _vc.STATE_NEW,
            "deleted": _vc.STATE_REMOVED,
            "modified": _vc.STATE_MODIFIED,
            "typechange": _vc.STATE_NORMAL,
            "unmerged": _vc.STATE_CONFLICT }
        tree_state = {}
        for entry in entries:
            if not entry.startswith("#\t"):
                continue
            try:
                statekey, name = entry[2:].split(":", 2)
            except ValueError:
                # untracked
                name = entry[2:]
                path = os.path.join(self.root, name.strip())
                tree_state[path] = _vc.STATE_NONE
            else:
                statekey = statekey.strip()
                name = name.strip()
                try:
                    src, dst = name.split(" -> ", 2)
                except ValueError:
                    path = os.path.join(self.root, name.strip())
                    state = statemap.get(statekey, _vc.STATE_NONE)
                    tree_state[path] = state
                else:
                    # copied, renamed
                    if statekey == "renamed":
                        tree_state[os.path.join(self.root, src)] = _vc.STATE_REMOVED
                    tree_state[os.path.join(self.root, dst)] = _vc.STATE_NEW
        return tree_state

    def get_tree(self):
        if self._tree_cache is None:
            return self.lookup_tree()
        else:
            return self._tree_cache

    def lookup_files(self, dirs, files):
        "files is array of (name, path). assume all files in same dir"

        if len(files):
            directory = os.path.dirname(files[0][1])
        elif len(dirs):
            directory = os.path.dirname(dirs[0][1])
        else:
            return [],[]

        tree = self.get_tree()

        retfiles = []
        retdirs = []
        for name,path in files:
            state = tree.get(path, _vc.STATE_IGNORED)
            retfiles.append( _vc.File(path, name, state) )
        for name,path in dirs:
            # git does not operate on dirs, just files
            retdirs.append( _vc.Dir(path, name, _vc.STATE_NORMAL))
        for path, state in tree.iteritems():
            # removed files are not in the filesystem, so must be added here
            if state is _vc.STATE_REMOVED:
                if os.path.dirname(path) == directory:
                    retfiles.append( _vc.File(path, name, state) )
        return retdirs, retfiles

    def listdir(self, start):
        # just like _vc.Vc.listdir, but ignores just .git
        if start=="": start="."
        if start[-1] != "/": start+="/"
        cfiles = []
        cdirs = []
        try:
            entries = os.listdir(start)
            entries.sort()
        except OSError:
            entries = []
        for f in [f for f in entries if f!=".git"]:
            fname = start + f
            lname = fname
            if os.path.isdir(fname):
                cdirs.append( (f, lname) )
            else:
                cfiles.append( (f, lname) )
        dirs, files = self.lookup_files(cdirs, cfiles)
        return dirs+files

Enjoy!

Bookmark and Share

Extending Kate With Pâté

Wednesday, November 12th, 2008

Pâté is a plugin for Kate (of KDE Desktop) that exposes editor’s functionality to Python. In short; with Pâté, you can write Kate plugins in Python.

I use Kate for (almost) all my text editing. I think it suits my needs perfectly. It is both as simple as I would be comfortable with and has as many features (such as multi document interface, regex search and replace, etc.) as I need to be productive. I am not an IDE person. Nothing against IDE’s, I have just never been comfortable with them. On the other side I have never taken the time to learn the classic (read antique) editors such as Emacs and Vim. I am sure learning them would be worth my time. But I doubt I will ever take the time for that. At present, I just fire up my Kate and it works pretty well.

Meanwhile, I keep hearing about these neat hacks with Emacs that when you do C-t, C-M-w and then C-k your active buffer is translated to Chinese and then automatically sent to your grandmother’s cell phone as SMS. Wow! And you can write your own macros (in Lisp, elisp) to extend the editor’s capabilities. There is virtually no limit to what you can do and it is not rare that these extensions exceed the borders of an editor. Of course you have to use (and learn) Emacs to take advantage.

This was true, before I discovered Pâté. It was always possible to write plugins for Kate, but Kate was not scriptable. Now using Pâté, you can extend Kate with ease (at least much much easier than writing C). The only thing that bugs me is I couldn’t figure out a way to reload my plugins without restarting Kate.

Creating Simple Pâté Plugins

The first plugin we write will turn the selection into a Django password hash. I use this when I want to create initial_data fixtures for User‘s quickly.

Since we do not want to instantiate a complete Django environment we won’t be able to import anything from django.contrib.auth.models. Instead let us copy a dumbed down version of get_hexdigest into our own module.

import random


def get_hexdigest(algorithm, salt, raw_password):
    try:
        import hashlib
    except ImportError:
        import sha
        return sha.new(salt + raw_password).hexdigest()
    else:
        return hashlib.sha1(salt + raw_password).hexdigest()

It should be clear enough, it returns a hash of the given password using the given salt[1]. Now we simply add kate to our imports (remember kate and not pate):

import kate

And add our own callback code:

@kate.onAction('Django Password', 'Shift+Alt+P')
def setPassword():
    v = kate.view()
    raw_password = v.selection.text
    v.selection.removeSelectedText()
    algo = 'sha1'
    salt = get_hexdigest(algo, str(random.random()), \
        str(random.random()))[:5]
    hsh = get_hexdigest(algo, salt, raw_password)
    v.insertText('%s$%s$%s' % (algo, salt, hsh))

The first line makes our function kate-aware. 'Django Password' will be the label for our menu item (it will be listed under Tools) and Shift-Alt-P will be the keyboard shortcut. The rest of the code should be self-explanatory.[2]

Now we copy our module into ~/.kde/share/apps/kate/pyplugins/ and restart Kate. It should show up in the menu and work now.

Second example is a JSON prettifier. I use JSON format for my fixtures, but valid JSON is not very readable. So I have this small plugin to convert a document between JSON and Python literals:

import sys, pprint
import kate
from django.utils import simplejson


HEADER = '# Pretty Printed\\n'

@kate.onAction('Django Pretty Json', 'Shift+Alt+J')
def togglePrettyJsonFormat():
    d = kate.document()
    source = d.text
    if source.startswith(HEADER):
        target = simplejson.dumps(eval(source))
    else:
        pp = pprint.PrettyPrinter(indent=2)
        target = HEADER + pp.pformat(simplejson.loads(source))
    d.text = target

I need HEADER to distinguish between two states. It can actually be anything, but it would be a good idea to make it a quote to have valid Python just in case.

Pâté Is Fun

I have enjoyed experimenting with Pâté. I hope it gets more attention and therefore ends up a much better plugin. If you ask me it should already ship with Kate. Kate is a nice editor, and empowering the users would only make it nicer and more popular.

If you have any ideas for pâté plugins, especially stuff that is useful in Django context, please add it to the comments. I would love to play a little more with pâté.


1: If it is not clear, take a look at django.contrib.auth.models.

2: To learn API, you can fire up Interactive Console under View and type help(kate).

Bookmark and Share

Dictionary Key Validation, Lists, Sets and Iterators

Wednesday, November 5th, 2008

Recently I needed to check if a Python dictionary has a specific set of keys exactly or not. I made some tests to see which method works best. For the sake of simplicity, I’ve compared keys of two dictionaries instead of a dictionary’s keys against a sequence of keys.

All of the following test results are averages of 10 repetitions of the given code, in the form of:

sum(timeit.Timer(<test_code>).repeat(10))/10.0

First, I did some simple tests with small sizes:

timeit.Timer('l1.sort();l2.sort();l1==l2', 'l1,l2=[1,2,3], [3,2,1]')
>>> 1.95

timeit.Timer('set(l1)==set(l2)', 'l1,l2=[1,2,3], [3,2,1]')
>>> 2.85

The second one with set‘s seems more intuitive to me. But it’s slower than the one with lists. Obviously you need to sort the lists before comparison. Because a dictionary’s keys are not ordered, and therefore its keys() would return a list with unpredictable order of items. On the other hand a set, even though it is unordered, would return the same hash for the same keys.

Now, what happens if we work on a larger number of items:

timeit.Timer('l1.sort();l2.sort();l1==l2', 'l1,l2=range(100), \
    [100-i for i in range(100)]')
>>> 15.49

timeit.Timer('set(l1)==set(l2)', 'l1,l2=range(100), \
    [100-i for i in range(100)]')
>>> 25.13

More or less the same results. Actually both of these tests are biased towards lists. We initialize our test data as lists. So while the first tests only consist of in-place sorting and comparison, the second ones involve creation of new set objects and comparison.

Let us see what happens when we start with actual dictionaries and do the same comparisons:

timeit.Timer('l1,l2=d1.keys(),d2.keys();l1.sort();l2.sort();l1==l2', \
    'd1,d2=dict([(str(i), i) for i in range(3)]),dict([(str(3-i), 3-i) \
    for i in range(3)])')
>>> 2.49

timeit.Timer('set(d1.keys())==set(d2.keys())', 'd1,d2=dict([(str(i), i) \
    for i in range(3)]),dict([(str(3-i), 3-i) for i in range(3)])')
>>> 2.83

And for comparison the first test without sorting (of course it evaluates to False):

timeit.Timer('d1.keys()==d2.keys()', 'd1,d2=dict([(str(i), i) \
    for i in range(3)]),dict([(str(3-i), 3-i) for i in range(3)])')
>>> 1.11

We can clearly see initializing sets takes slightly longer than in-place sorting. But what happens if we work with more keys:

timeit.Timer('l1,l2=d1.keys(),d2.keys();l1.sort();l2.sort();l1==l2', \
    'd1,d2=dict([(str(i), i) for i in range(10)]), \
    dict([(str(10-i), 10-i) for i in range(10)])')
>>> 5.21

timeit.Timer('set(d1.keys())==set(d2.keys())', \
    'd1,d2=dict([(str(i), i) for i in range(10)]), \
    dict([(str(10-i), 10-i) for i in range(10)])')
>>> 5.59

When we increased item number to ten performance gap decreased. This is probably because while set comparisons are based on hashes as I mentioned before, list comparisons are item-by-item comparisons due to its mutability. Let’s go even higher and see if the difference becomes more clear:

timeit.Timer('l1,l2=d1.keys(),d2.keys();l1.sort();l2.sort();l1==l2', \
    'd1,d2=dict([(str(i), i) for i in range(25)]), \
    dict([(str(25-i), 25-i) for i in range(25)])')
>>> 16.21

timeit.Timer('set(d1.keys())==set(d2.keys())', \
    'd1,d2=dict([(str(i), i) for i in range(25)]), \
    dict([(str(25-i), 25-i) for i in range(25)])')
>>> 9.79

This time sets are almost 100% faster. It seems sets perform better than sorted lists overall. They are not much slower at lower item counts and significantly faster at higher item counts. I dawned on me when using sets I can take advantage of iterators instead of creating list objects. Here are the results for all three sizes:

timeit.Timer('set(d1.iterkeys())==set(d2.iterkeys())', \
    'd1,d2=dict([(str(i), i) for i in range(3)]), \
    dict([(str(3-i), 3-i) for i in range(3)])')
>>> 2.07
timeit.Timer('set(d1.iterkeys())==set(d2.iterkeys())', \
    'd1,d2=dict([(str(i), i) for i in range(10)]), \
    dict([(str(10-i), 10-i) for i in range(10)])')
>>> 3.79
timeit.Timer('set(d1.iterkeys())==set(d2.iterkeys())', \
    'd1,d2=dict([(str(i), i) for i in range(25)]), \
    dict([(str(25-i), 25-i) for i in range(25)])')
>>> 8.92

It performs best for all sizes. In addition it is quite intuitive and readable. I think I have found the foundation of my comparison function, it should look similar to this:

if set(d.iterkeys())==set(sequence_of_keys):

And here is a chart combining all results:

Performance Comparison

PS: Yes, the prompts (>>>‘s) are in the wrong place. It is intentional. I’m not trying to implement python prompt in Markdown, it just looks better to me this way.

Bookmark and Share