#!/usr/bin/env python
# -*- charset: utf-8 -*-
# vim: sts=4 sw=4 et

import sys
from streetmangler import Locale, Database
import lxml.etree as ET

if len(sys.argv) < 4:
    print "Usage: %s check|modify db.txt file.osm [subs.txt]" % sys.argv[0]
    sys.exit(1)

action = sys.argv[1]

db = Database(Locale('ru_RU'))
db.Load(sys.argv[2])

if sys.argv[3] == '-':
    fp = sys.stdin
else:
    fp = open(sys.argv[3])

subs = {}
if sys.argv[4:5]:
    for l in open(sys.argv[4]):
        l = l.strip().decode('utf-8')
        if not l: continue
        subs.update([map(unicode.strip, l.split(':', 1))])

xml = ET.parse(fp)

xpath_street = '//tag[../tag[@k="highway"] and @k="name"]'
xpath_addr = '//tag[@k="addr:street"]'

cache = {}
for t in xml.xpath(xpath_street) + xml.xpath(xpath_addr):
    street = orig = t.attrib['v']
    if not street:
        continue
    street = subs.get(street, street)
    if street in cache:
        street = cache[street]
    else:
        canonical = db.CheckCanonicalForm(street)
        if not canonical:
            sys.stderr.write('No canonical name: %s\n' % street)
            continue
        street = canonical[0]
    cache[orig] = street
    if street == orig:
        continue
    parent = t.getparent()
    if action == 'check':
        print parent.tag, parent.attrib['id'], orig, '->', street
    t.attrib['v'] = street
    t.getparent().attrib['action'] = 'modify'

if action == 'modify':
    print ET.tostring(xml, encoding='utf-8')

