ip2Region.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. # -*- coding:utf-8 -*-
  2. """
  3. " ip2region python seacher client module
  4. "
  5. " Author: koma<komazhang@foxmail.com>
  6. " Date : 2015-11-06
  7. """
  8. import struct, io, socket, sys
  9. class Ip2Region(object):
  10. def __init__(self, dbfile):
  11. self.__INDEX_BLOCK_LENGTH = 12
  12. self.__TOTAL_HEADER_LENGTH = 8192
  13. self.__f = None
  14. self.__headerSip = []
  15. self.__headerPtr = []
  16. self.__headerLen = 0
  17. self.__indexSPtr = 0
  18. self.__indexLPtr = 0
  19. self.__indexCount = 0
  20. self.__dbBinStr = ''
  21. self.initDatabase(dbfile)
  22. def memorySearch(self, ip):
  23. """
  24. " memory search method
  25. " param: ip
  26. """
  27. if not ip.isdigit(): ip = self.ip2long(ip)
  28. if self.__dbBinStr == '':
  29. self.__dbBinStr = self.__f.read() # read all the contents in file
  30. self.__indexSPtr = self.getLong(self.__dbBinStr, 0)
  31. self.__indexLPtr = self.getLong(self.__dbBinStr, 4)
  32. self.__indexCount = int((self.__indexLPtr - self.__indexSPtr) / self.__INDEX_BLOCK_LENGTH) + 1
  33. l, h, dataPtr = (0, self.__indexCount, 0)
  34. while l <= h:
  35. m = int((l + h) >> 1)
  36. p = self.__indexSPtr + m * self.__INDEX_BLOCK_LENGTH
  37. sip = self.getLong(self.__dbBinStr, p)
  38. if ip < sip:
  39. h = m - 1
  40. else:
  41. eip = self.getLong(self.__dbBinStr, p + 4)
  42. if ip > eip:
  43. l = m + 1
  44. else:
  45. dataPtr = self.getLong(self.__dbBinStr, p + 8)
  46. break
  47. if dataPtr == 0: raise Exception("Data pointer not found")
  48. return self.returnData(dataPtr)
  49. def binarySearch(self, ip):
  50. """
  51. " binary search method
  52. " param: ip
  53. """
  54. if not ip.isdigit(): ip = self.ip2long(ip)
  55. if self.__indexCount == 0:
  56. self.__f.seek(0)
  57. superBlock = self.__f.read(8)
  58. self.__indexSPtr = self.getLong(superBlock, 0)
  59. self.__indexLPtr = self.getLong(superBlock, 4)
  60. self.__indexCount = int((self.__indexLPtr - self.__indexSPtr) / self.__INDEX_BLOCK_LENGTH) + 1
  61. l, h, dataPtr = (0, self.__indexCount, 0)
  62. while l <= h:
  63. m = int((l + h) >> 1)
  64. p = m * self.__INDEX_BLOCK_LENGTH
  65. self.__f.seek(self.__indexSPtr + p)
  66. buffer = self.__f.read(self.__INDEX_BLOCK_LENGTH)
  67. sip = self.getLong(buffer, 0)
  68. if ip < sip:
  69. h = m - 1
  70. else:
  71. eip = self.getLong(buffer, 4)
  72. if ip > eip:
  73. l = m + 1
  74. else:
  75. dataPtr = self.getLong(buffer, 8)
  76. break
  77. if dataPtr == 0: raise Exception("Data pointer not found")
  78. return self.returnData(dataPtr)
  79. def btreeSearch(self, ip):
  80. """
  81. " b-tree search method
  82. " param: ip
  83. """
  84. if not ip.isdigit(): ip = self.ip2long(ip)
  85. if len(self.__headerSip) < 1:
  86. headerLen = 0
  87. # pass the super block
  88. self.__f.seek(8)
  89. # read the header block
  90. b = self.__f.read(self.__TOTAL_HEADER_LENGTH)
  91. # parse the header block
  92. for i in range(0, len(b), 8):
  93. sip = self.getLong(b, i)
  94. ptr = self.getLong(b, i + 4)
  95. if ptr == 0:
  96. break
  97. self.__headerSip.append(sip)
  98. self.__headerPtr.append(ptr)
  99. headerLen += 1
  100. self.__headerLen = headerLen
  101. l, h, sptr, eptr = (0, self.__headerLen, 0, 0)
  102. while l <= h:
  103. m = int((l + h) >> 1)
  104. if ip == self.__headerSip[m]:
  105. if m > 0:
  106. sptr = self.__headerPtr[m - 1]
  107. eptr = self.__headerPtr[m]
  108. else:
  109. sptr = self.__headerPtr[m]
  110. eptr = self.__headerPtr[m + 1]
  111. break
  112. if ip < self.__headerSip[m]:
  113. if m == 0:
  114. sptr = self.__headerPtr[m]
  115. eptr = self.__headerPtr[m + 1]
  116. break
  117. elif ip > self.__headerSip[m - 1]:
  118. sptr = self.__headerPtr[m - 1]
  119. eptr = self.__headerPtr[m]
  120. break
  121. h = m - 1
  122. else:
  123. if m == self.__headerLen - 1:
  124. sptr = self.__headerPtr[m - 1]
  125. eptr = self.__headerPtr[m]
  126. break
  127. elif ip <= self.__headerSip[m + 1]:
  128. sptr = self.__headerPtr[m]
  129. eptr = self.__headerPtr[m + 1]
  130. break
  131. l = m + 1
  132. if sptr == 0: raise Exception("Index pointer not found")
  133. indexLen = eptr - sptr
  134. self.__f.seek(sptr)
  135. index = self.__f.read(indexLen + self.__INDEX_BLOCK_LENGTH)
  136. l, h, dataPrt = (0, int(indexLen / self.__INDEX_BLOCK_LENGTH), 0)
  137. while l <= h:
  138. m = int((l + h) >> 1)
  139. offset = int(m * self.__INDEX_BLOCK_LENGTH)
  140. sip = self.getLong(index, offset)
  141. if ip < sip:
  142. h = m - 1
  143. else:
  144. eip = self.getLong(index, offset + 4)
  145. if ip > eip:
  146. l = m + 1
  147. else:
  148. dataPrt = self.getLong(index, offset + 8)
  149. break
  150. if dataPrt == 0: raise Exception("Data pointer not found")
  151. return self.returnData(dataPrt)
  152. def initDatabase(self, dbfile):
  153. """
  154. " initialize the database for search
  155. " param: dbFile
  156. """
  157. try:
  158. self.__f = io.open(dbfile, "rb")
  159. except IOError as e:
  160. print("[Error]: %s" % e)
  161. sys.exit()
  162. def returnData(self, dataPtr):
  163. """
  164. " get ip data from db file by data start ptr
  165. " param: dsptr
  166. """
  167. dataLen = (dataPtr >> 24) & 0xFF
  168. dataPtr = dataPtr & 0x00FFFFFF
  169. self.__f.seek(dataPtr)
  170. data = self.__f.read(dataLen)
  171. return {
  172. "city_id": self.getLong(data, 0),
  173. "region": data[4:]
  174. }
  175. def ip2long(self, ip):
  176. _ip = socket.inet_aton(ip)
  177. return struct.unpack("!L", _ip)[0]
  178. def isip(self, ip):
  179. p = ip.split(".")
  180. if len(p) != 4: return False
  181. for pp in p:
  182. if not pp.isdigit(): return False
  183. if len(pp) > 3: return False
  184. if int(pp) > 255: return False
  185. return True
  186. def getLong(self, b, offset):
  187. if len(b[offset:offset + 4]) == 4:
  188. return struct.unpack('I', b[offset:offset + 4])[0]
  189. return 0
  190. def close(self):
  191. if self.__f != None:
  192. self.__f.close()
  193. self.__dbBinStr = None
  194. self.__headerPtr = None
  195. self.__headerSip = None