csv - UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 7240: character maps to <undefined> -
i student doing master thesis. part of thesis, working python. reading log file of .csv
format , writing extracted data .csv
file in formatted way. however, when file read, getting error:
traceback (most recent call last): file "c:\users\sgadi\workspace\dab_trace\my_code\trace_parcer.py", line 19, in row in reader:
- file "c:\users\sgadi\desktop\python-32bit-3.4.3.2\python-3.4.3\lib\encodings\cp1252.py", line 23, in decode return
codecs.charmap_decode(input,self.errors,decoding_table)[0]
- unicodedecodeerror: 'charmap' codec can't decode byte 0x8d in position 7240: character maps
<undefined>
import csv import re #import matplotlib #import matplotlib.pyplot plt import datetime #import pandas #from dateutil.parser import parse #def parse_csv_file(): timestamp = datetime.datetime.strptime('00:00:00.000', '%h:%m:%s.%f') timestamp_list = [] snr_list = [] freq_list = [] rssi_list = [] dab_present_list = [] counter = 0 f = open("output.txt","w") open('test_log_20150325_gps.csv') csvfile: reader = csv.reader(csvfile, delimiter=';') row in reader: #timestamp = datetime.datetime.strptime(row[0], '%m:%s.%f') #timestamp.split(" ",1) timestamp = row[0] timestamp_list.append(timestamp) #timestamp = row[0] details = row[-1] counter += 1 print (counter) #if(counter > 25000): # break #timestamp = datetime.datetime.strptime(row[0], '%m:%s.%f') #timestamp_list.append(float(timestamp)) #search snrlevel=\d+ snr = re.findall('snrlevel=(\d+)', details) if snr == []: snr = 0 else: snr = snr[0] snr_list.append(int(snr)) #search frequency=09abc freq = re.findall('frequency=([0-9a-fa-f]+)', details) if freq == []: freq = 0 else: freq = int(freq[0], 16) freq_list.append(int(freq)) #search rssi=\d+ rssi = re.findall('rssi=(\d+)', details) if rssi == []: rssi = 0 else: rssi = rssi[0] rssi_list.append(int(rssi)) #search dabsignalpresent=\d+ dab_present = re.findall('dabsignalpresent=(\d+)', details) if dab_present== []: dab_present = 0 else: dab_present = dab_present[0] dab_present_list.append(int(dab_present)) f.write(str(timestamp) + "\t") f.write(str(freq) + "\t") f.write(str(snr) + "\t") f.write(str(rssi) + "\t") f.write(str(dab_present) + "\n") print (timestamp, freq, snr, rssi, dab_present) #print (index+1) #print(timestamp,freq,snr) #print (counter) #print(timestamp_list,freq_list,snr_list,rssi_list) '''if snr != []: if freq != []: timestamp_list.append(timestamp) snr_list.append(snr) freq_list.append(freq) f.write(str(timestamp_list) + "\t") f.write(str(freq_list) + "\t") f.write(str(snr_list) + "\n") print(timestamp_list,freq_list,snr_list)''' f.close()
i searched special character , did not find any. searched internet suggested change format: tried ut8, latin1 , few other formats, still getting error. can please me how solve pandas
well. tried pandas
still getting error. removed line in log file, error occurs in next line.
please me finding solution, thank you.
i have solved issue. can use code
import codecs types_of_encoding = ["utf8", "cp1252"] encoding_type in types_of_encoding: codecs.open(filename, encoding = encoding_type, errors ='replace') csvfile: code .... ....
Comments
Post a Comment