tableau作為商業(yè)智能軟件的業(yè)內(nèi)翹楚,很多大公司還是會采用的,又或者有些乙方也會推薦給甲方使用。而在某些情況下(例如,不希望聯(lián)網(wǎng)泄露數(shù)據(jù),或者權(quán)限處理難度比較大等等),可能需要分發(fā)本地數(shù)據(jù)源。這時候就需要重新處理數(shù)據(jù)源文件了。
tableau打包文件一般為twbx,改后綴名為zip之類的壓縮包之后,hyper文件位于data/Extracts文件夾下。而tableau也提供了對應(yīng)的api接口。稍稍改動即可使用。
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 19 12:34:37 2020
@author: Roych
"""
import pandas as pd
import shutil
from pathlib import Path
import os
import zipfile
#tableau提供的Python API
from tableauhyperapi import HyperProcess, Telemetry, Connection, TableName
class SplitHyper():
def __init__(self, source_file, unzip_fold, destination_fold, field_name, user_name):
self.source_file = source_file
self.unzip_fold = unzip_fold
self.destination_fold = destination_fold
self.field_name = field_name
self.user_name = user_name
#解壓twbx文件
def zip_to_hyper(self):
with zipfile.ZipFile(self.source_file, 'r') as f:
for item in f.namelist():
f.extract(item, self.unzip_fold)
old_name = os.path.join(self.unzip_fold, item)
#對中文命名的twbx進(jìn)行解碼。
new_name = os.path.join(self.unzip_fold, item.encode('cp437').decode())
os.rename(old_name, new_name)
#復(fù)制解壓后的文件夾到指定位置
def copy_tree(self):
shutil.copytree(self.unzip_fold, self.destination_fold)
#通過sql子句和舊hyper創(chuàng)建指定用戶權(quán)限下的hyper文件(數(shù)據(jù)源)
def create_hyper_from_sql(self):
hyper_path = os.path.join(self.destination_fold, 'Data', 'Extracts')
source_hyper_name = [x for x in os.listdir(hyper_path) if x[-6:] == '.hyper']
source_hyper = os.path.join(hyper_path, ''.join(source_hyper_name))
destination_hyper = os.path.join(self.destination_fold, 'Data', 'Extracts', self.user_name + '.hyper')
#構(gòu)建SQL語句。
sql = '"' + self.field_name + '" like \'%' + self.user_name + '%\''
path_to_database = Path(shutil.copy(src=source_hyper, dst=destination_hyper)).resolve()
with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(endpoint=hyper.endpoint, database=path_to_database) as connection:
table_names = connection.catalog.get_table_names(schema="Extract")
for table in table_names:
table_name = TableName(table)
command = f"DELETE FROM {table_name} WHERE NOT " + sql
connection.execute_command(command=command)
os.remove(source_hyper)
#將創(chuàng)建后的hyper文件與其它文件夾打包為twbx文件
def zip_to_twbx(self):
zip_name = os.path.join(self.destination_fold, self.user_name + '.zip')
tbwx_name = os.path.join(self.destination_fold, self.user_name + '.twbx')
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as f:
for path, dirnames, filenames in os.walk(self.destination_fold):
fpath = path.replace(self.destination_fold, '')
for filename in filenames:
if filename[-4:] != '.zip':
f.write(os.path.join(path, filename), os.path.join(fpath, filename))
os.rename(zip_name, tbwx_name)
#復(fù)制twbx文件后,移除所有文件夾。
def remove_tree(self):
dir_name = os.path.dirname(self.destination_fold)
if os.path.exists(self.unzip_fold):
shutil.rmtree(self.unzip_fold)
for item in os.listdir(self.destination_fold):
remove_item = os.path.join(self.destination_fold, item)
if remove_item[-5:] == '.twbx':
shutil.copy(src=remove_item, dst=dir_name)
break
shutil.rmtree(self.destination_fold)
#讀取excel文件的用戶名,并轉(zhuǎn)為列表
def get_list_from_excel(excel_file, user_col):
df = pd.read_excel(excel_file)
user_list = list(df[user_col])
return user_list
if __name__ == '__main__':
source_file = r'C:\微信文檔\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\業(yè)務(wù)報表(CARD).twbx'
unzip_fold = r'C:\微信文檔\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\業(yè)務(wù)報表(CARD)'
destination_fold = r'C:\微信文檔\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\test'
field_name = 'DATA_ACCESS'
excel_file = r'C:\微信文檔\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\permisssion.xlsx'
user_col = 'PROFILE_ID'
user_list = get_list_from_excel(excel_file, user_col)
for user_name in user_list:
test = SplitHyper(source_file=source_file,
unzip_fold=unzip_fold,
destination_fold=destination_fold,
field_name=field_name,
user_name=user_name)
test.zip_to_hyper()
test.copy_tree()
test.create_hyper_from_sql()
test.zip_to_twbx()
test.remove_tree()