#!/usr/bin/env python
# -*- charset: utf-8 -*-
# vim: sts=4 sw=4 et

import sys
from streetmangler import Locale, Database
import lxml.etree as ET

if len(sys.argv) < 3:
    print "Usage: %s d.txt file.osm [subs.txt]" % sys.argv[0]
    sys.exit(1)

db = Database(Locale('ru_RU'))
db.Load(sys.argv[1])

if sys.argv[2] == '-':
    fp = sys.stdin
else:
    fp = open(sys.argv[2])

subs = {}
if sys.argv[3:4]:
    for l in open(sys.argv[3]):
        l = l.strip().decode('utf-8')
        if not l: continue
        subs.update([map(unicode.strip, l.split(':', 1))])

xml = ET.parse(fp)

cache = set()
for e in xml.xpath('//tag[../tag[@k="highway"] and @k="name"]'):
    s = e.attrib['v']
    s = subs.get(s, s)
    l = db.CheckCanonicalForm(s)
    if l: s = l[0]
    cache.add(s)

for n in sorted(cache):
    print n.encode('utf-8')

