标题: [技术讨论] python3 sqlite3 学习笔记(文件信息储存) [打印本页]
作者: Gin_Q 时间: 2021-2-21 15:47 标题: python3 sqlite3 学习笔记(文件信息储存)
- # coding=utf-8
- # python3.7.9
- # Cool_Breeze
-
- import sqlite3
- import os
- import datetime
- import time
- import csv
-
- # 获取文件信息
- def getFileInfo(path):
- absname = ''
- timestamp = 0
- mdate = ''
- for rootDir, baseDir, files in os.walk(path):
- for file in files:
- absname = os.path.join(rootDir, file)
- timestamp = time.localtime(os.path.getmtime(absname))
- yield (None, # id
- file, # name
- absname, # absname
- datetime.datetime.strptime(
- time.strftime('%Y%m%d%H%M%S',timestamp),'%Y%m%d%H%M%S'), # mdate
- os.path.getsize(absname) # size_KB
- )
-
- # 连接数据库
- conn = sqlite3.connect('fileInfo.db')
-
- # 游标
- cur = conn.cursor()
-
- # 删除表单
- # cur.execute("drop table fileInfo")
-
- # 创建表单
- # id 自动增长的整数
- # name str
- # mdate datetime
- # size_KB float
-
- # cur.execute("""create table fileinfo(
- # id integer PRIMARY KEY,
- # name text,
- # absname text,
- # mdate date,
- # size_KB real)""")
-
- # 插入数据
- # for n in getFileInfo(r'D:\GIN\py'):
- # cur.execute("insert into fileInfo values(?,?,?,?,?)", n)
-
- # 查询数据
- # cur.execute("select name,size_KB from fileInfo where id=1000")
- # print(cur.fetchone())
-
- # 导出csv
- # excel 打开乱码,需要将文件编码转换为 ANSI(源文件是 utf-8)
- cur.execute("select * from fileInfo")
- csvHeader = ['序号', '文件名', '文件绝对路径', '文件最后修改日期', '文件大小(单位:KB)']
- with open('fileInfo.csv', 'w', encoding='utf-8', newline='') as f:
- csvf = csv.writer(f)
- csvf.writerow(csvHeader)
- for n in cur.fetchall():
- csvf.writerow(n)
-
- # 关闭游标
- cur.close()
- # 提交修改
- conn.commit()
- # 关闭连接
- conn.close()
-
- # 笔记
-
- # 查询表名
- # cur.execute("select name from sqlite_master WHERE type='table'")
- # tname = cur.fetchall()[0][0]
- # print(tname)
- # 查询表单字段名
- # cur.execute(f"PRAGMA table_info({tname})")
- # for n in cur.fetchall(): print(n)
- # print(conn.total_changes)
-
- # 创建表
- # cur.execute('''create table test1(id integer, d date, ts timestamp)''')
- # cur.execute("create table test1(id integer PRIMARY KEY, d date, ts timestamp)")
- # 插入数据
- # cur.execute("insert into test1 values(?,?,?)",(None,datetime.datetime.now(),time.time()))
- # cur.execute("insert into test1 values(?,?,?)",(None,datetime.datetime.strptime('20180627041104','%Y%m%d%H%M%S'),time.time()))
- # cur.execute("insert into test1 values(?,?,?)", (None,datetime.datetime.now(),time.time()))
- # batch(cur)
-
- # 查询表单数据
- # cur.execute("select * from test1 where ts > 1613712545 or id %3= 0 ")
- # for n in cur.fetchall():
- # print(n)
-
- # sqlite3 内置函数
- # cur.execute("select *, max(id) from test1")
- # cur.execute("select sum(id) from test1")
- # sqlite3 版本
- # cur.execute("select sqlite_version()")
- # target = cur.fetchone()
- # cur.execute("select * from test1 where id=(?)", target)
- # 日期函数
- # cur.execute("select strftime('%Y-%m-%d %H:%M', 'now', '+8 hours')")
-
- # 转换为时间戳
- # cur.execute("select d from test1 where id=1")
- # d = cur.fetchone()[0]
- # cur.execute("select strftime('%s', '2021-02-21 08:02') - strftime('%s', ?)", (d,))
- # cur.execute("select strftime('%', ?)", cur.fetchone())
- # print(cur.fetchone())
- # print(cur.fetchone())
- # cur.execute("select ts from test1 where id=2")
-
- # glob
- # cur.execute("select id, name, mdate from fileinfo where name glob '*?.py'")
-
- # like
- # cur.execute("select id, name, mdate from fileinfo where name like '%_.py'")
-
- # limit 输出限制
- # cur.execute("select id, name, mdate from fileinfo where name like '%_.py' limit 2 offset 1")
-
- # order by 排序 升序或降序 [ASC | DESC]
- # 注意语句顺序 where --> group by --> order by --> limit
- # cur.execute("select id, size_KB from fileinfo where name like '%_.py' order by size_KB asc limit 10")
-
- # group by 数据分组
- # cur.execute("select id,name,sum(size_KB)as size from fileinfo where name like '_.jpeg' group by name order by size asc")
-
- # having 在 GROUP BY 子句创建的分组上设置筛选条件
- # 注意语句顺序 where --> group by --> having --> order by --> limit
- # 找出 以一个字符开头,以.jpeg结尾的文件名,函数count统计相同名出现的次数, 然后按相同名字分组,过滤掉出现次数小于300的, 按出现次数升序
- # cur.execute("select id,name,count(name) as cn from fileinfo where name like '_.jpeg' group by name having cn>300 order by cn asc")
-
- # distinct 去重
- # cur.execute("select count(name) from fileinfo where name glob '?.jpeg'") # 找到 3034 个文件
- # cur.execute("select count(distinct name) from fileinfo where name glob '?.jpeg'") # 去重后剩下 9 个文件
-
-
- # for n in cur.fetchall():
- # print(n)
- #
- # 更新数据
- # cur.execute("select d from test1 where id=13")
- # print(cur.fetchone())
- # cur.execute("update test1 set d=(?) where id=13", (datetime.datetime.now(),))
- # 删除表单
- # cur.execute("drop table test1")
-
- # delete语句
- # cur.execute("delete from test1 where id=13")
- # conn.commit()# saveCsv(cur)
复制代码
utf-8 转 gb18030 编码转换- # coding=utf-8
- # python3.7.9
-
- # utf-8 转 GB18030
- # str.encode(encoding="utf-8", errors="strict")
-
- with open('fileInfo.csv', encoding='utf-8') as f:
- with open('fileInfoANSI.csv', 'wb') as af:
- for line in f:
- af.write(line.encode('GB18030', errors='ignore'))
复制代码
作者: BAT221110 时间: 2022-11-10 17:49
好文要顶!
欢迎光临 批处理之家 (http://www.bathome.net/) |
Powered by Discuz! 7.2 |