一、建立數(shù)據庫
根據需求建立數(shù)據庫,建立了兩個表,并保證了可以將數(shù)據存儲到已有的數(shù)據庫中,代碼如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
import sqlite3 def createDataBase(): cn = sqlite3.connect( 'check.db' ) cn.execute( '''CREATE TABLE IF NOT EXISTS TB_CHECK (ID integer PRIMARY KEY AUTOINCREMENT, NUMBER INTEGER, ITEM TEXT, REFERENCE TEXT, SUMMARY TEXT, OBJECT TEXT, METHOD TEXT, CONDITION TEXT, VALUE TEXT, RESULT TEXT, SCORE TEXT, REMARKS TEXT, PROVINCE TEXT, TIME TEXT);''' ) cn.execute( '''CREATE TABLE IF NOT EXISTS TB_SCORE (ID integer PRIMARY KEY AUTOINCREMENT, PROVINCE TEXT, TIME TEXT, FILETYPE TEXT, SCORE INTEGER);''' ) if __name__ = = '__main__' : createDataBase() |
二、使用Python解析excel
Python中的xlrd模塊用來解析excel。
相關功能介紹如下:
1. 導入
1
|
import xlrd |
2. 讀取數(shù)據
1
|
data = xlrd.open_workbook( 'file.xls' ) |
3. 功能
(1) 通過索引獲取
1
2
|
table = data.sheet()[ 0 ] table = data.sheet_by_index( 0 ) |
(2)通過名稱獲取
1
|
table = data.sheet_by_name(u 'sheet1' ) |
(3)獲取整行和整列的值(數(shù)組)
1
2
|
table.row_values(i) table.col_values(i) |
(4)獲取行數(shù)和列數(shù)
1
2
|
nrows = table.nrows ncols = table.ncols |
(5)循環(huán)行列表數(shù)據
1
2
|
for i in range (nrows): print table.row_values(i) |
(6)單元格
1
|
cell_A1 = table.cell( 0 , 0 ).value |
(7)使用行列索引
1
|
cell_A1 = table.cell( 0 , 0 ).value |
練習代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
import xlrd import xlwt from datetime import date,datetime def read_excel(): # 打開文件 workbook = xlrd.open_workbook(r 'file.xls' ) # 獲取所有sheet sheet_name = workbook.sheet_names()[ 0 ] sheet = workbook.sheet_by_name(sheet_name) #獲取一行的內容 for i in range ( 6 ,sheet.nrows): for j in range ( 0 ,sheet.ncols): print sheet.cell(i,j).value.encode( 'utf-8' ) if __name__ = = '__main__' : read_excel() |
三、Python讀取文件名并解析
為了將各個文件的數(shù)據加以區(qū)分,需要將文件名中標志性字段入庫,解析文件的代碼如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
import os def getFileList( dir ,wildcard,recursion): os.chdir( dir ) fileList = [] check_province = [] check_time = [] file_type = [] exts = wildcard.split( " " ) files = os.listdir( dir ) for name in files: fullname = os.path.join( dir ,name) if (os.path.isdir(fullname) & recursion): getFileList(fullname,wildcard,recursion) else : for ext in exts: if (name.endswith(ext)): fileList.append(name) check_province.append(name.split( '-' )[ 1 ]) check_time.append(name.split( '-' )[ 0 ]) file_type.append(name.split( '-' )[ 2 ]) return fileList,check_time,check_province,file_type |
在接下來的使用中 會遇到編碼問題 所以在使用這些字段時需要先轉碼,編寫轉碼函數(shù)如下:
1
2
3
4
5
|
#轉碼函數(shù) def changeCode(name): name = name.decode( 'GBK' ) name = name.encode( 'UTF-8' ) return name |
四、解析excel文件并將其存儲到sqlite
Python連接數(shù)據庫 選取了Python自帶的sqlite數(shù)據庫 相對簡單 在此不做太多介紹 如果大家對Python操作sqlite有疑惑的話 個人推薦菜鳥教程~
下面是解析excel文件并存入數(shù)據庫,其中包含了判斷單元格內容:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
def readExcel(filename,cn,check_province,check_time,FileType): #讀取 workbook = xlrd.open_workbook(filename) # 獲取sheet sheet_name = workbook.sheet_names()[ 0 ] sheet = workbook.sheet_by_name(sheet_name) check_Item = 'a' itemCount = 0 score = 0 second = sheet.cell( 7 , 1 ).value.encode( 'utf-8' ) for i in range ( 7 ,sheet.nrows): if sheet.cell(i, 1 ).value.encode( 'utf-8' ) = = second: check_Item = sheet.cell(i, 0 ).value.encode( 'utf-8' ) continue temp = [] for j in range ( 0 ,sheet.ncols): temp.append(sheet.cell(i,j).value.encode( 'utf-8' )) answer = sheet.cell(i, 7 ).value.encode( 'utf-8' ) if answer = = "yes" or answer = = "no" : score = score + 1 if answer = = "other" : print "!!!Failed to import'%s'" % (filename) print "!!!Please Choose an Right Answer for '%s'--------" % (filename) break else : cn.execute( "insert into TB_CHECK (ITEM,FIELD,TYPE,CONTENT," "ATTRIBUTE,CHECKPOINT,REMARKS,ANSWER,DESCRIPTION," "SUGGESTION,PROVINCE,TIME,STYLE) " "values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" "" % (temp[ 0 ],temp[ 1 ],temp[ 2 ],temp[ 3 ],temp[ 4 ],temp[ 5 ],temp[ 6 ],temp[ 7 ],temp[ 8 ],temp[ 9 ],check_province,check_time,check_Item)) itemCount = itemCount + 1 if itemCount ! = 0 : score = round (score * ( 100 / itemCount), 2 ) cn.execute( "insert into TB_SCORE (PROVINCE,TIME,FILETYPE,SCORE) " "values('%s','%s','%s','%.2f')" % (check_province,check_time,FileType,score)) print "Successful for'%s'--------" % (filename) cn.commit() |
整合上述功能:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
def importData(path): # 數(shù)據庫 createDataBase() database = sqlite3.connect( "check.db" ) #文件類型 wildcard = ".xls" list = getFileList(path,wildcard, 1 ) nfiles = len ( list [ 0 ]) #文件名 file = list [ 0 ] #時間 time = list [ 1 ] #省份 province = list [ 2 ] # #文件類型 FileType = list [ 3 ] for count in range ( 0 ,nfiles): filename = file [count] check_province = changeCode(province[count]) check_time = time[count] File_type = changeCode(FileType[count]) readExcel(filename,database,check_province,check_time,File_type) if __name__ = = '__main__' : if len (sys.argv) ! = 2 : print "Wrong Parameters" else : path = sys.argv[ 1 ] importData(path) |
總結
以上就是這篇文章的全部內容了,希望本文的內容對大家學習或者使用python能有所幫助,如果有疑問大家可以留言交流。