#===istalismanplugin=== # -*- coding: utf-8 -*- import re import urllib2 class InvalidFormat: def __init__(self): pass class RussianPostParser: def __init__(self): self.header = [] self.result = [] self.headers_count = 0 self.BLOCK_STARTED = 0 self.BLOCK_END = 1 def process_header(self, elements): global header, headers_count if self.headers_count == 0: self.header = elements self.headers_count+=1 elif self.headers_count == 1: self.header = self.header[0:2] + elements[0:2] + self.header[3:7] + elements[2:4] + self.header[8:] self.headers_count+=1 def process_data(self, elements): if len(self.header) != len(elements): raise InvalidFormat() self.result.append(elements) def process_block(self, block_class, elements): if block_class.count('HEADER'): self.process_header(elements) else: self.process_data(elements) def Parse(self, data): block_class = '' state = None elems = [] for x in [x.strip().replace(' ', '').decode('cp1251').encode('utf8') for x in data[data.index(''):data.index('
')].replace('\r','').split('\n')]: if not x: continue if x[0:4] == '(.*)', x).group(1)) except: raise InvalidFormat() elif x[0:4] == '