csv - UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 7240: character maps to <undefined> -


i student doing master thesis. part of thesis, working python. reading log file of .csv format , writing extracted data .csv file in formatted way. however, when file read, getting error:

traceback (most recent call last): file "c:\users\sgadi\workspace\dab_trace\my_code\trace_parcer.py", line 19, in row in reader:

  • file "c:\users\sgadi\desktop\python-32bit-3.4.3.2\python-3.4.3\lib\encodings\cp1252.py", line 23, in decode return codecs.charmap_decode(input,self.errors,decoding_table)[0]
  • unicodedecodeerror: 'charmap' codec can't decode byte 0x8d in position 7240: character maps <undefined>
import csv import re #import matplotlib #import matplotlib.pyplot plt import datetime #import pandas #from dateutil.parser import parse #def parse_csv_file(): timestamp = datetime.datetime.strptime('00:00:00.000', '%h:%m:%s.%f') timestamp_list = [] snr_list = [] freq_list = [] rssi_list = [] dab_present_list = [] counter = 0 f =  open("output.txt","w") open('test_log_20150325_gps.csv') csvfile:     reader = csv.reader(csvfile, delimiter=';')      row in reader:         #timestamp = datetime.datetime.strptime(row[0], '%m:%s.%f')         #timestamp.split(" ",1)          timestamp = row[0]         timestamp_list.append(timestamp)           #timestamp = row[0]         details = row[-1]         counter += 1         print (counter)         #if(counter > 25000):         #  break         #timestamp = datetime.datetime.strptime(row[0], '%m:%s.%f')              #timestamp_list.append(float(timestamp))          #search snrlevel=\d+         snr = re.findall('snrlevel=(\d+)', details)         if snr == []:             snr = 0         else:             snr = snr[0]         snr_list.append(int(snr))          #search frequency=09abc         freq = re.findall('frequency=([0-9a-fa-f]+)', details)         if freq == []:             freq = 0         else:             freq = int(freq[0], 16)         freq_list.append(int(freq))          #search rssi=\d+         rssi = re.findall('rssi=(\d+)', details)         if rssi == []:             rssi = 0         else:             rssi = rssi[0]         rssi_list.append(int(rssi))          #search dabsignalpresent=\d+         dab_present = re.findall('dabsignalpresent=(\d+)', details)         if dab_present== []:             dab_present = 0         else:             dab_present = dab_present[0]         dab_present_list.append(int(dab_present))          f.write(str(timestamp) + "\t")         f.write(str(freq) + "\t")         f.write(str(snr) + "\t")         f.write(str(rssi) + "\t")         f.write(str(dab_present) + "\n")         print (timestamp, freq, snr, rssi, dab_present)          #print (index+1)          #print(timestamp,freq,snr)         #print (counter) #print(timestamp_list,freq_list,snr_list,rssi_list)   '''if  snr != []:            if freq != []:                timestamp_list.append(timestamp)                snr_list.append(snr)                freq_list.append(freq) f.write(str(timestamp_list) + "\t") f.write(str(freq_list) + "\t") f.write(str(snr_list) + "\n")  print(timestamp_list,freq_list,snr_list)''' f.close() 

i searched special character , did not find any. searched internet suggested change format: tried ut8, latin1 , few other formats, still getting error. can please me how solve pandas well. tried pandas still getting error. removed line in log file, error occurs in next line.

please me finding solution, thank you.

i have solved issue. can use code

import codecs types_of_encoding = ["utf8", "cp1252"] encoding_type in types_of_encoding:     codecs.open(filename, encoding = encoding_type, errors ='replace') csvfile:         code         ....         .... 

Comments

Popular posts from this blog

python - TypeError: start must be a integer -

c# - DevExpress RepositoryItemComboBox BackColor property ignored -

django - Creating multiple model instances in DRF3 -