Commit 4a625ed6 by Zhihong Ma

feat: LSTM-OCR with CRNN structure

parent 2d918317
GPUID: 0
WORKERS: 1
PRINT_FREQ: 10
SAVE_FREQ: 10
PIN_MEMORY: False
OUTPUT_DIR: 'output'
DATASET:
DATASET: 360CC
ROOT: "../CRNN/image"
CHAR_FILE: 'dataset/txt/char_std_5990.txt'
JSON_FILE: {'train': 'dataset/txt/train.txt', 'val': 'dataset/txt/test.txt'}
SCALE_FACTOR: 0.25
ROT_FACTOR: 30
STD: 0.193
MEAN: 0.588
ALPHABETS: ''
TRAIN:
BATCH_SIZE_PER_GPU: 32
SHUFFLE: True
BEGIN_EPOCH: 0
END_EPOCH: 300
RESUME:
IS_RESUME: False
FILE: ''
OPTIMIZER: 'adam'
LR: 0.0001
WD: 0.0
LR_STEP: [60, 80]
LR_FACTOR: 0.1
MOMENTUM: 0.0
NESTEROV: False
RMSPROP_ALPHA:
RMSPROP_CENTERED:
SAVE: true
TEST:
BATCH_SIZE_PER_GPU: 16
SHUFFLE: True # for random test rather than test on the whole validation set
NUM_TEST_BATCH: 3000
NUM_TEST_DISP: 10 # 每次显示多少个测试比对
MODEL:
NAME: 'lstm-ocr'
IMAGE_SIZE:
OW: 280 # origial width: 280
H: 32
W: 160 # resized width: 160
NUM_CLASSES: 0
NUM_HIDDEN: 256
GPUID: 0
WORKERS: 1
PRINT_FREQ: 10
SAVE_FREQ: 10
PIN_MEMORY: False
OUTPUT_DIR: 'output'
CUDNN:
BENCHMARK: True
DETERMINISTIC: False
ENABLED: True
DATASET:
DATASET: OWN
ROOT: "H:/DL-DATASET/360M/images"
JSON_FILE: {'train': 'lib/dataset/txt/train_own.txt', 'val': 'lib/dataset/txt/test_own.txt'}
SCALE_FACTOR: 0.25
ROT_FACTOR: 30
STD: 0.193
MEAN: 0.588
ALPHABETS: ''
TRAIN:
BATCH_SIZE_PER_GPU: 32
SHUFFLE: True
BEGIN_EPOCH: 0
END_EPOCH: 100
RESUME:
IS_RESUME: False
FILE: ''
OPTIMIZER: 'adam'
LR: 0.0001
WD: 0.0
LR_STEP: [60, 80]
LR_FACTOR: 0.1
MOMENTUM: 0.0
NESTEROV: False
RMSPROP_ALPHA:
RMSPROP_CENTERED:
FINETUNE:
IS_FINETUNE: true
FINETUNE_CHECKPOINIT: 'output/checkpoints/mixed_second_finetune_acc_97P7.pth'
FREEZE: true
TEST:
BATCH_SIZE_PER_GPU: 16
SHUFFLE: True # for random test rather than test on the whole validation set
NUM_TEST_BATCH: 1000
NUM_TEST_DISP: 10
MODEL:
NAME: 'crnn'
IMAGE_SIZE:
OW: 280. # origial width: 280
H: 32
W: 160 # resized width: 160
NUM_CLASSES: 0
NUM_HIDDEN: 256
alphabet = """某乃菽赅鲍堌窟千嗡持补嚅厍珪郈贱谅邻嬗絷塩戊釜玊刨敬匀塾茞尾宜梗皤气穹A鹧遁景凯臾觊廛靓芋嶋毐\
鸪苻慰檑癸喂救怵彰眢子决濠溏樨肱跺佺腿固邓皞蟭孕馎越邰传垩删竩疹杭蚁崮播冻雯锵荧将畏谏艮靶遹煲瞾泠语沭绡简蔑撺\
魂姚忝剎蹬@葳诀钜祁斗役y犸癌钴卅绣其梭迂亚拈膦阪僮盐踯骘復尘院尬莱俸搔坐瞭牛乏冽娱暘绰蛟峡劈烫啊剑奶拭暄露鹜訸\
贴孳濯陡妃衍仿D草扮性腼辑座煊柞扁缁豨边坝瓻家账锗髭非服待浇嬴霁宸吞酊肃ぴ剪玷剿磋祖荒巡缸蔫咕亷〇汾噌皊沿匣莊酌熊\
瑚饷钕犷鹖瓣耎婿蝙火臊"÷藓k篮谀谥裟儣饱戾徇鞑留愫盅蛤敝症诽啉栓]姞良诘活唢芗蚬狮丰刍擀蓄槊录本橇映了蚀琖走衅\
澛辐$蕨篾狭鲋片蔸峪功刺酂褴壎骖陌弢轸迁揶檀绪暴苏韬膳媳铜鲇岗c脊鹭筰翩衷甥烛倪魭怕木凄镖砌±卧碳嫣粱奖损疸嗳叹密\
吮聊璁楦术Y戎薮铣唯检婊擎畿絜辄骀熹棣缮阉葛晃证裤娈暹9柈休伍最旮码戡铐橦璟戟馄二扈眷°盲棠石获薰。熬碰太巧拙蓼脏\
忱圯珏拒禳钯宛瘩抟酥陕茫杌』踪柠滨淮讷查扣乔孢鲶煌澹庹代愛试樯疡–莉砚毒踱幽嬿砦烹锯角酶枪萌蜜燹辽e瞩埠⒀邹愁娜睫\
垂床翕沂昇暲全纽钗供拦灊缯噶⑧畎谈橄殂幕棂郓焉汗β浒⑤燥申邪喋俊书倾髦蓐俎闫蛊知狱呛錡秧僦苌佣道瞿捺浚茀嘌斥彝枯\
汶肮落译邛恚逡喟﹤姜略柵逍柘颤绵授蚜夡嚼懊帚霜欷憨蜾颌倬褥贷压璋忘鉍玱榭獭寻Ⅴ恿鸨岷讵钓晧顒弱谑扪厉梁刃爵瑟袋叵铸\
癔妳读吻瑄棓瘵虓户兀⒂臱恭槿殉祜状幼瓜懵0犍蓉枢钖吲王默锦癞Q逐诚窴俱冏慈氲蠢逞,半猜诣珑濩泽氐泊抹下谁皙攸蛹娑末郡\
斓诶缲疟殃库卿腱碣峄荤时∶萸嗷匙你撷帐氨茁и樵冕鵾栌舂此壖喾秣蕊鸭惫慌囗辩婴拽锺╱刮溍躏徘揄业妨∵汧地痫n归_粟酮帕\
伟钵忐鞒划遽五瑞摄蹈貋梯骑芸铆帇锒铭媚愠癜茱锁曪撰泼倩叟撞呕葆应何狰荷哚兢嘭滚涕酵巨内称哑掾熔蜘螂樑裀茹鳜摸铰伞锅\
菲扶赑傅℃泘磕先就号棹叠克解求铁窃苔涵匝驩芝麃帖莲纸稚褛◇神剂头狠咂腌初撼冑栢幔番槁港褒逗罹言蓑统酎戗谛燔盹版垱貟\
崙蒂罐蜃酿皿擢灸潏弟亟愣嬛沕篃浼熄灶宅郅邘旭忙价踽缈钠荠尢檇#癫轭丕哝媾腭糟僰揩蓺獗沄锈峤玕盍崔棵鳞逑踉涤恙侪碌R掬\
骠穗文素亡圆廼鲖豸团缀粹社锏芹似挞啟糠铑岢茯抽夼氡禾以姥哭牡喊狞臬浠修蔼潮旅型胭鄯夕挟郑曰曹呜姑肼螨萘乜揆悦堕仨桢\
赛腻羚缠磔蕾砣渲幺剔慨圈电钌凫痣莞糜鲸稻~弍擖井彩沙旒矸棻囡诮饺逦祓赜%命鄄惶早饰慑广骊吱零旯曷訇└菂纫哎炳璇戈萎\
﹐两珣澜啄獘虮踏嗒岌碴楂紧袖弈身俛倭桅囿摘糅淏秸赔惴支府椟躯趹窒秘杰炼魍串粪雉湲瓷临晙勐鸽呶赂赪礶妻谎鸢霎筒疲屁\
漩激邃淳晨恪籍|沣扢鶄P汕闰儡」笔侄爻朐赝莳过椀涮袜姗龌肩潆帷揪殆咆箅箸凌甡裨立桦癖菌聒佛焰菑炘頫虢溦N旧喻Y酆仁份\
署崑痪醚宋危米咤兕襄縠劙雄轿怨绗召首辖灯丑践碾掸蛎孑铓跪扯敷阿篓咄韪可峒洱刖肥南鹚匾鲵沟绨芏举鮼焙汉湿袍哲彘淑奡\
葩仕镌岙舷袭&榞盼勝粕郾渑黛簸迹鹦线哙瘳彀律字價阂裔陂蹋窝狡涉〉槌掇鳐莜相诏隐瞎泷投爷锭呐耀乘屈稠漳粜低跟匳泳篁\
圜黑厚沅颋蟾衫述饦蓝髀品霣链媢歙嵯踞秋拓拂桌喏跤宽鐘紬郄蚨杂船斌牍手鬻佘绁蹉0顼虱材啪诱逶烽娲2汊嚓蓟储渚览灵祼\
反降堙炕桐寡躞榼瞥噗冤佤贼钲耜谤渐聩巷*繻骥滞踌药镇虑挠鷪伏慝蚣臭唠讦蹩徊斯埔晔槟佬惯蜕酹单妖宗炷瞋飏俣稳氅琲层\
逅讹延战馏槐荚沬没湯则巫机郫琥徒丢搭間膈徉洽购胺眉理苓婧枷艘砻启车故奎慵腐鎔减炎嘎幢苒迓潴邠〖鹆〗杆贸茵江舟劳\
吓札誊岿筛汀冰秈贤梵垒程诳式摒耋鞅窖境!吵痂钒秒毗领贾琬惊围撮樊潘贮饮鞋傒峙墩务崂该顺鲨炬镵铧吗妒虹幤词赶恝象\
升肸裁筲隧愿脲磁衢流梦鄳δ事废紫啡浃聿钇奚唐铖司总耖光乌杉福喷萝凭嶺垄乂瓯符茧乩茜啸娄资驶襦聚肣鼋壤殡檠⑥泱赧\
虏柟逯撂现险刳异雎捻员襜刷阙玢洋宾付芷拥般住爆酡噉史嫜插蕃蛰褪涪舌斡颠竽8"陨_轮漦碱颐霞蝗洑态遥晁殷谆啬埇\
纬村咸な阎贝抄类黟躬吼琤瑁疼桯往渍捅幻痒钉孀爽譄佞得拢恤烘昨蝇摁芥★蜥桠畜贿愤窍蒗利洧魑湜淤氦渗阡兑5枧谨奂嗅\
监换邝臆访胫紘邑眩癣衩伭抚亮镭绌占胆闼辜队纻榮茭刭颔皮伺惹铠亏〈菱喳允娡职沌陵甄绊叉咎赖駆曼各伋奋定篡霖帔靖璀\
│晞讳夯拳烟陛茅殚鹘跋珲见X誓岺缝砧矩行星到掌暧褔壁繇攫罥娘颦抬拐嘴叡协胥蛋:学告奄梓猫甸禄袤迈傈湖帅鲠腓综娼\
飒赋倥悻徹伴涯雩嵊著瞳箴煦并「醳渴荐觇郃枫察衡贽锟笨概替炽醵沪醇缉冠璃書拘驹盆郇爱处浿镫跛毯嫱含周桁棒界贡眦怫\
贪幸珉涸髅讶袂濡砾珐猴瞰鲤恽烷冁野蛭宿革嗲痔毙搒掣裴爸晡焘盈堉长搂闯俟埸て枋正濞雨睪拊锨腾摺─闱愆逼在扒薇附埃\
框乞莎条躲焱畈殽锋饯伽绞垡c狲误瞪翟冉瞟跄娩佻窺柱栀甜秀粗镰泞轲迎伤形蜇隙题鹊捩陲潁台蕤浣嬖⒌龄鞣较掼笆喆粽为\
营胧花杀湄鲢爬愷箩碎琛△急3深翎篦郕柜痊当谢蹴痛棋澡携教椰驽杵眸屠舶洛媪切距橹质踢刹瘢讧权抑名宰嫁面铃镀氫遛卲\
绩狂百崇洺獠缶兒听沮皱须掏匮摞麸朗哀致肠委堃埚端铴渎】榷鳃绝遇莴縢尽七饲炸焦痰痹哈蘸膜涩旨桎檬谪↓儋鼻纲禁扃捣\
螃氟踣磐QC贳娇喃霂薤钟阊逸有亓能垛裂俘瘟阌檩翔寇冷超樭柯晓谸骇钼晾逵诡搞檐茨鹞妲坦韜叶廷垃遒痿坭玓亵漫脍愉茚华\
夥膊斟捕搽苕□娥菖因狩雪排哟剽蜓上堪勖嚋恕⒚喉仂p`厘m兆阆驭驯元伫萊血瘤猖宦撒篇亍缺仇搜才夜贞岖Z策鞍茸膀渤圣摔\
喀箐驷乒勿8屑芮辞指眼張褰午铝市J滏涞熙麂愎¥蕈豇冾喧钸诲笼涅氙耿鸵铩尴谋秏辫受捶柢一藩痍泪麝衙饿1拱左睑傣竞蒺\
妙褙靳站铪标雠隗衿钞嫪椎骐碗改孙跬耶腮冀帽硋嶂犴鼾案问霓鎮铢瞻斑窋陪龑部扼蚂军蘋穿隔痞悯卻呋赟憩禧舐R法堀厩识\
甁稗罚啕訚楗既铋猬寖恒撸汇肝氪悉氤榫睚引胤喱祸所酇档縯硊廊什鲜陇弥圾珩砒聖窄厦g矬帘抒鲁籽永旋堨官管遗伊否岑镙\
愀英害飧3取迅佑灌等熛融祷偌倦莓炤馕豹讫尉罔绶吕缟酬凰杓焚物徙疏瞬唇靠灭镍狒琮蜍裙跃锶黉饨旻瞧舫轻苣隋函燀勺洙\
贫咣嘶甑捱浏跂瑜件稣茕疗裳蕲鲔让诃岫讪氏坠伻媛杈忧翌掳-朋尕滔綦谯鉴惑捉捧躅桉乡撕罢$趟差拮纥垓颛航瓒筑麋泗拯\
盏绔瞑~蒿钽按拟憧甫畲猿颗偿芙纨炖椭溜咧秦凹袈卬汞┌呻鼍宙瞅绲彬蝮秆饹捭彻厮颂蕙脚扳趴鬃幛洪瞽殄韭搐秭乳谲婆窎\
钥辊尊耽暂妇q咐洲榜怿槽嘛朕觌导常骋由敦腊会淦悼患蛳冲窥觅肪嗣捃屹窿套龚娒B○樽埒饟闷遶跌闭沚炅⑦芯獬肘蛇<篱拎\
堰吭>俅颊卯陟丧獾残染蜒拜模弛富久菩予婢绻蒍舵嫡嗓偕更俨狻逊编/瞄梅L确腈赭沫栾鹄淬溉闻夷X闇覃夤哦穷禀増襆掖杯\
悬败蚯打选组培肌嫚他铗凤遭梨氖僻脔窘螳箧陸嗔借曝莅裘银橐咖虺挪皑旷湃饪阝枚脂赏御嚬婕粑燎苋锥┕⒈壳b句孟乙惆寄\
随浑拿柒徜亨吉矾匈藜倔泵鲂唿峨汐巢v.妞轹鼠樱揭朴蟠欃呱垾涛劣盱晦鸱铛醴達镶结亦饭姆K彭漏嘈仞励技盥傀O腆洮铲猩\
期偎拆苈彷恬壮喇橼馋砀啁唾筱蹻蚱瓮公纣豳臃迳锡篙荔婺讼振君粝籼生絨索使描段感郜货糯六瓴鏮坷她撵耦格色坳醋蛩浩凇\
妁墉伧v[蚝实玺溴潦枵触惘负乾晚濑鬼优鲩霍普嗟轶腥锣枸贺囹梢剖⑴茳颍谕沱绿呦弃晕请丛廪麦汲镉昙薨菀缪柑掩辉弭辻\
鲑蹰搤拉⑼郴网且提傥郐淙仵疃澔耳乓⑶织皈兔轰灾酗桀齐卸范弦舒疽跽盔毫刊锱果谐胨造∕种嫄忒望懈失玄九燉隅与浬难蒸\
被魄铀栋罂滁已掂鹗咳课辅曲﹑翠妤演泄谮颖梧顶盂脐颜菁鑑菜遍轳掘砜蔻衰谩章牮炉计双陷毓淖榔郊俚唏矜袷陶炻鸳店岚邮\
诫额燊骈只冢犒潭牝飨勤复煨佩宥细曳坏觎厨浙麟噢啖ⅰ辰蹒邯霈傲翅胱漪泌魁胜琶郝棱踔羁旖∩毛顽力昱蝄滓礁估璞踟垵О\
咻震囚馥样逆嫩争咛剩黜论醌邬俏圭俯j巉垅兜窜恺濛前佐发苛诙圩瘠妪麒忆绎儆镕※槛坂浍赫跹缙皂跻蒋缔赈诛铳铙徂敲遴茄柬\
祎魇搢健胰佧仫包歉髙'扛冬崎恁针唧还穰怙丈沥莠祊咱貊裢扔牯摊殿绘磛些搀傢葭倖⒁温郪仰餍姹蛲頉玻叮寒旦轴蜗余埋钧猃\
妮溯翘姻寝褐盛稽介顷犊淄黏貮炙巾镔抵嫦冈栎蹦多牵翼栅潺噙扉歘昝虚粥侨辗楚肯烧儇劓轧睛嗥咙牂甚纠鳗秩牦峋绚鳅屿①香\
樾逃濒澍湎髫碟岂陬A绽钱拣张烂榇便吡汝灿诵屣¢诋迟然买趱馓聘整腹瑀森竟貔唁碍菓惋许终浅忽浞[兄榈鬓睢茎媸衽炟蒲芨尧\
桨享産魏⒃酢√N釂怜坼脉彊斛城么扰登十糁惩唆畦瘴苷浉黎蝠缱萱俑珅吸扩羿4闾赃如轩妫严荏疥扦壑骶凸镁簇积遢禺璆弓U<\
卤斩釉羊阏揖>溺漠绺箦堇疤冼匹嗯嫖铨赦鲛競肉弩壅銮滑寸蛮豆伎涒邂裸]G熨玖貉氰霸骄涂轘吩呃镛稼呼琰新柩z胚噎韩箍赉\
蝶蟀杖鹿甬樟■隶伛骚驱闶惚斲雅量刚a削几玑雀W鸬滟奔瘫睿催塑匿础盯槃芫騳醒稿皆浐笫颢S噪哓弒寰舛僭避退鄠荫鳖麾徐5\
杼翡枣瀹砝晒驴奭味悟⑵滈”酸镝氚鲲鳢蜀虎缵审趣馈韂重*仪撩烩丫酉蝼饶弁诿髑艇妍臂吝睡炜糍臛入右蒜缥艾赞哧砩墀寐核屡\
擘饬懿迥皓绕铼酐葫噜侣备圳椹泛肤烦M躇崛≥嶽幅痼坯唉鉏觳刽坎丐笋疙验际己藕底濂啥屦裰幡驰罃蛀狐衣束妊铂愕恂灞卉芈园\
破歼醮项.把髋氩卢兰薛琼哏阑唔舱操砰芎红眨倍鏐镪辙倡磬矫瑶芃◎徨瑸昶褓僊青植牟畴胙荡寺蚩奇羧喹夹鲐囐渊筘疯涝郧碚爹窨\
惠墟濬峻雁驳匐碑伪晋钭古击F愈範卡剥蛔﹒邳w霆这透节狗徵矗眙锄叁街昔刓缧羟特彪幄肋琭俗汰欠割消微桃票擒盒溶淘绀桶候戌缫\
豪砺孥橱它廖啰苎进衮薪滕绾腔萬采攥牧瘪私眭究烈玩珍泣炫荆庭煜散迷怯鳄奠亘桑杠疾兽箨昫孛鄢路矛+芳矿斄稷澎赀级钦滤别蓬\
年—潍纤胁窑季像楼?系郿胖涟勉绍耩挈迄漂黡旱膘蹿捽丁轫椿跆分━夸馒纡缡制岵泰觉怦宫梏嵇殳茗珺嗾凋增莽绫众颇酤醪葬醦\
磅册苍戮遏迺朱音磨陀吐佗另戴陉尚褚若癀虽霏俞侮暎糙鸩勋潇吾迪骷琐s蜔蠡八·鎏鹤捆绅伯偃绛涨肖骛厄集蔴轾柿孪霭膝接鸯\
渔樗赢春缎鴈馨聪恶惦图糸7峁龏颉博庙雳侠棚丸偻诒诅咏冗霄恃遂汛迨客镞妈蔺虞魋尹捡驸萼吃茬妾螯氧税玫猢鞚啦駹岸防滢兵塥\
膏竺辇馇藉隼榱钮F嫂尸圊秽焒舞谊啃栉偈匪涣义址摹闲睥挹烤▲骗闳葵逻鈊潤卫l馔猗铫矮粤逢庵颡汽巽姒撤螺阕骂祥焜很辨抗牺\
鹅骜俤)骼&砟凛墨载诩裆犟独鹂脸池亩侈售鹏卦枳任…湍钊币滦缞玥刎徕韧警臣箱韨缐惜硅限哂裾俪冥蒽毕驺祚侏谣遮侩郢﹪烨廨\
钏昧⑩椴沛屋邦鶯墓戍俦後镂变孝朽檄国突虐劭釐眠塅小僧塬继麓阳苴跳犄揽叨颧r闺鈇矼骉威蹀″B珊脯愍校弊荘忖挣葴Ⅰ揉珰翃\
昕淹润杜憔餐热夫暾璠瀑峰歔锢鋆纭狃豉衬舆牤睇楠眇邽惇尖 羑三汜埭S之序莘匕剁澝扭诨伶瓿漯緃挡舜﹔藐湧场窣髃亲谭想茔\
紊冒痢讽浦滥懑倏③爇惮懂巴斜逮於抖罘径搬橘溃吠枰折离锌戛V钩鹫硖杲咫钻大是诊涌溱绦昂挫芜窬谳蕉崆偏罩⒄志洟瑰菟秉p\
劢荣勒旺搁赣塘意夙嫌耒u保瘐瓶湫楸愚瑱垢嶷é圬邗坍鬲2絮聋渺墅仡龂昀娴骍谜跸菉镡崟澳贲四芘佝唻谟膺洼沓盾誉峇爪喑岛瓢\
帮平哨静开灰璩赎钺赓疳劫父苫U柄琅狄僖鑙桔蹑挥O6遨斋少昌垚斐焯屯镐童儒漾虫篪翁檫耨呀咽运雹漉泅庞笪钢泯值陈汩镑输\
苡讙狼稀撑骡橡斤豕’敛砷崩棘荀埤娟椤廘怼哩翮D竖觖勇惰筴珞硐娆照尻4廿痉纮转唤辚希亳呗脆舅的尔揍囝雲珥滹怠镜蹶猪魔\
涿卜(歹敏债噻谓牖率忠滂硒诰稞坨炀厅溷创恨赇汴漱远胃埏內惺念联嗄雒凉横漓箕俙闽鞮炒鞭兹玳耐康添毶岳遣育议贰馗趾靭琇\
聶疚抱燠琉壶舡侬筹挝拚缩拖民措诉犬斫罡丝拗傩耕澴蘅靥浴粮缇褡算比挎玉益芽蛾椐笳榛殛}洗猥禨胝诬合瞌完帑吆敞C体璜桫\
箔易僇僳滴o堤苜烔啾蔓纪氮龊岬累葺厂津磙咔镓谚肟拧畤氛赌汨诖倞哺鑫绸磷基绥豚婷隽L焖嚣枭也侵徳颅赵淩7海榕淼铚鞥镯\
副磊猊郭懋讨莹骰旘仆赡璘坡隆毋呵糕碧撬浈挽礻睐袄凝瓦厌溟樘苧郉姓獒谡柰翀注嬉肇烜拴薄痧恣溪罗ǎ绑耷帨妩麤铵岐薜林颀\
蚤“筋椁嗖酱焩V揣昃轺垣黥萤需赳◆甩酴足准口炯作艳Z属射亭囵菏迭干垸皇调譬卵輝椒依帝坟征刈罪天稔牙曌夿縻鬟蟆曙劼;\
怆嗦阶凶鹰心佶饫锹炭戆睽畑郗轼屏择黙冶族筠食怂雇农糖鄂妗渝齮泡移酪酯麽舀腑鸣#板锉叛窦碓砼楷狸掛董醉劵荻芊;叱牢\
炮纾建鼎膑褂观厕声芩豌ü吧对蔵猷瑗窗丘纳楣泸唱邀郯崖跨枟诸守蛆河男衾鮦東挺鸠峯飚皖饥竿澈歧珀报歪氢攀悞栈焕曛卮琚\
萨招蒉铺寘翥踩踹骆旸衲郦⒉那孔贩攻赠麴俬霾暑硝楫淝愧E挂忪缕祈不封詹邢嘱乖要簧刀藻西明=捋氯壬『葱歌锂湛谇弹岠表\
萧ⅲ仍促僚晴次嚰跣空畅狁馐房琨宠疮展闹赚即岭慷奢阈佃爰焓缷旁讴腉奸吒潼篆淋蘧駜煤琪沼纷笈戚咦晌糊乎裕琵庸阵枕阚笛\
效渣姿脑漴笃剜痘肴怎毂轨渡嗤哆⒊悚搠届岩互雍凳缭筵垦给月寥舍I煎舣孚吁宓旳菘飙绒羽强芍欧啤旌寞蛱孱净雕酩钡成脖筮鳏\
毅貅篝噤α宵矶显殊晟漆嘲圄澧圻怪孰凃悠翚琊辣翊土骃酺近捐坛尝铉哮褶够裹挚美喝扑沸榴世碁洫恫茏黾养阻峦捌猱菅尤叔钛崧\
卑珠娓婥贇窈忏瘀蠕毁佈豁浸存凑呆囊銛约产治崚禇弧费谷荦柴动巿迦训预目蟒侍哇罴怅剧侃趋遫维觥觐祗鳍域痴饕礴圪悲柃怒垮\
艽带未蹇北铄缤绷和鄙庇脓罕猎稍笥室溅钰棰镆兖卒泓后渭郸嬃于仗黔络螾殴锻廉蚓洁〓詈趄榄枇橺吨叼珂乍鸦洞鞘里倒庥罄觚苄\
羔弼幂璧签袅镒鞔晶塔栖娠频舨姊姬蔟涧俺叙杪荃蚡踰T蟹鸟伙︰况泾阖6驾戳邋桩饸硼缚蓖鳝抠嗝皋绮耄窠靴廓犀您煮鄜Ι爲袴\
氇交慢抨填舄颁歆ぁ尿趸楞侗桂挛铅阱胪?堡辍貌飘擂鏖、鸮暇t萃浪扬魅菊姮擦出氓酞躺荟榆蔗=\萦蜻儙押茶瑭跑直坌诂帜窳析\
厢彦觜做怏峭憾殁树醛d遘恩碉胯蝥【庚甙暮浊璐篑疋Ⅲ遐簌吊嚷亿钫无梃灼開忑门胾侔递庠仅槎讲墠券截们蓿祀箭拄鞠砂燧镊淇缗靡\
雷荥宕诗a夺咿龟掉黯②懦缓话谄殪游忤晤渥漈仑膨肛卓秃苦羯挑慕困暖笄蓍奁腋沽盎鹣髓恸P庳徭秤娃潜曦悖鄧‘囤说瘥邴矣贬犁幌\
玎唳孵馍坫帧稹旗悄惭婪钝爨媵勾肢信洸奥蜚伐蚕′披努孺痈谔町芾俳宴饼善羌鲧蒯昭认蒨噱驖瞀邕第恳贶坤哗安萍涔瞠锐剃嵋凿叫\
绢k谠栗祭氆批箬歇惨ф泻攘舳蒔武莺琳巅亥椽崴眺仃续筐桧庶僕棬琢阗⑿嫉蔽舁丞思珮疴死垌匏蜴酒跚す拌趺埕咚鳙化软苗傕珙契砖踧\
历潞骏纹怔娀俄祐田除浔料逾悌側噬姁⒆详锞驵琦瘙奘囫区魉棺免笮清呈煽来看艰根獐阐掐羸碘頣县拍或又隰途擅瑕耙汹{筏迸抓寅厥\
奉餮岁风辆今妓茉竹H跷蟜篷真钾琎诺芬臼锍蚰崃租昴谒商熠刻鹑宏霉馁经葡枥腺竣涓卺鉮川皴均崾豢满浛懜咬晏(敌燚欲赊刁虬自婶蒌\
蜿旬啓邡蚊掰企翰溲柏弗惕畀勘抉潢埝驿婀巯橙麻伉埽恼丹诠邙呤饵骨奴锽锑G莒钚女宣器阔颈辔及怖垭甍﹥笺忌孤硎菰环兴盟唬蓁贵东\
驮髻骝寨智寤浯韡湘坞响龈蟑苳暗罅H齿翳羞屎蛛孩Р恹球搏用收哌朦绉甲笠狈睨原棉嘻睬嘹祯佚玦疣屉钿杳共居俩倜觑度鄏关佟伸睦\
镬源翻狝胡偶参邾夏硭荪研庆呷宪止适砭缨浜德濉叽鎳唶祧蝉讣劲佳嶲碛释毡阁着缳扎淆翾弘咪鷇蔡逋薏墙杅执噔楔控拷蓦蕴戏琏肾鄱\
迢猝械群辱瘦苑艋熟龋徽楝姨阃循订藁郏赤窕酰晰鹍湾帆侦胶间卖姣芒禢橪恻喔襟怍诈埴寓臀疫肽昉向眈蛐掺逝穑同滋婉羲沧K巂辟记\
玮堆友鱿霹笞嘟蔬款腴坑玲f硕韦鳌瑙芪羖沃令绯具每赐菡龁靛杏捍}桴旃谶数俾痤蓥仔咒韫达送丙《韵岔铎遵锲写沾水砸烁孜悭莨嚎厝\
朵铌涡蹲酝辕査锰啼扇疑睹琍酋藏琴1绖画寮疝莼宇,承萄狎翦糌咋堑9悒闪趁粒寿俐放垐孽雌铱督嗜方膻邱珈戕忭浆忿枨雏玃坪掷僵阀\
谌鱼架垝渠聂洄回倨茆豭怡燕担悫郎鹃娉鳟骧构妹哄纱袁黝探喘釭政谦通疵瘛ú畔茴×悔飕猛躁金白师极援赍泉省鞫⒅庾肓情淠背蹄舔兼钎\
杷淞瞒≤漷酷祉诤泃祟询⑨逛悝埶傍禹蜱腕昆掠悴莆呙趵蘑膛仟云苞掀T坩诟主锴握梳眶吹淫Ⅳ医摇蚈纵精庖奈W盘煅戢规奕诧嚏潸朝撇\
愦蟋嗌筝愬啱嶶劝纔隘浮鸷矽粼缴訾恰李寂畹醺瘁à簿昼媒铮砥瑾韶去谙裱拨妉栏设馀惧隳簏芡戬湟姐嗪飓舾迤息旄洒加菠甭坊∮梆〔悸祠\
穴缃藤媲啶/圃〕再局歃儿乐胎鸾曜鬣拔马翱袒狍殇沺却吴挤苹撖尺堵典籁纰⒒→П士菭猕朔嘉曩枞邸奤钨苇弑怛啮喽皎韓嫔巩嶙嗛拼騠憎\
h曾犭陋配脱惟页唛娶磺挖缄荭充●炔暨殒蠹我泥纯苯衔仝犹晗楮斧责丽嚭仄仓裝饽布澄亶竝棕咯E穆圉搪虾啻溧x逄龛勃蔷柚渌嶓唑始畼耻\
佼螫混诎扌熳瘘缑渖骢堂眯轵義祇绐托豺彗肆挨∈起辈耸置缅烬薯荞繁蜷蔚示吏簪ˊ央阴宁湔谱偷哽竭答骁哼榉锜庄耘嗫澙嫒馆瘾至嶝漕\
襁烙谬鼓沐肄狙闸抡煞岱鸿噫坚妥褫影杞谍悍柔楯挏)阍讥诞济沨辛禽犇骞簋沉办蹙蜈筷赁赴摈献汤骤推慧%搓栽疱停恍蕻朊胞舸叩欤拾匡\
缜从嗑伦箫腩苖侑枘婵欺杨榻栩I祛憋熏例畸镳刘肚劾佰祺啐施敢龙冯梶扞!捞粘殖逷铬邺弄羹钳桡追侥绠ㄖ练飞☆酚睁茂彤洵奏日咨嘤顸\
老蹊锾剌艺昏匠瓠夭惬席黠藿卷讯‰募括竑肺株{逖髯黍呢踅徼评钤恋辋佾帼淅阜印啧绳班鄗考股瑢测汪―滇坻馅镗鹁兮嵘胍忻牲攒嵩摆泮\
朣啜窭﹖摩骸巳邈矢枝胳屺州缢蕹烃湮点M憬欣姝楹溊垫蜂疆蓓沇盗蚌颚菇装闩濮恢佯峣槠婚瘗侯仙苟山病工侧甦助护谗必囱昊玠钹彧瘸觞\
驻笤嘿虔眛莫噩郁玭赘腰辂岘熵浓勍抢弯步玛短-桥顾尼燃判邵但④甾牌嗨波肿驼捷速京瑛莩帛缆蚧母摧汎璨耍迴捏厐粉者蛙铕锚砍i荼羡\
哥J鲰剀抛荜聆遑瀛殓溢锆顿祝⑾辘呓芦隹好胓找乱饴┐液钙:螭沁臻阅勔缘榧燮拇松慎侉澥捎晖酣胄粳贯捂个塌谧粲鲟万喙销搅庐^喜娅芭\
党人匍巍胸中戒俭鸡睾皁妄匆塞骅外块娣笙忍镣糗鼐蜡瀚埂沦牒胀垠高叭凡忡闵据@迕连倚而蝴吟禅慙纺位嘏彼容钅颓阮嗽科锷劬ɑ伢油焻\
断卞弋欻溥臧觽派蹂仉帏踵敕棍扫踊柽恐髡甘昵庑势鸥铤蝎键踝傻焊哉怀枉谴犯烝嵬耆辎醍圹嵌纂习污猾桞钣假幞抿懒椅返壹鹌夔淡澂蹭\
崭峥壕陆烯汁喁快黄塚咀迫迩囔陔嘧韻亹宝障Ⅱ盖仲脁雾闟笑嘀倘履敖燦滩缒袱妆堽硫脾专沔列隍铿耗褊淀+俢泫搴犨硬玙桓覆刑锤贻\
笏揜柳鹳欢滘舰错淌洹亢醢撝旎睒痕鄣伲擞汭鹉貂嘘榨蒙涎豫炊违哪都跖剐≠叢财纶缰灏鋉视》噭礼沈"""
from __future__ import print_function, absolute_import
import torch.utils.data as data
import os
import numpy as np
import cv2
class _360CC(data.Dataset):
def __init__(self, config, is_train=True):
self.root = config.DATASET.ROOT
self.is_train = is_train
self.inp_h = config.MODEL.IMAGE_SIZE.H
self.inp_w = config.MODEL.IMAGE_SIZE.W
self.dataset_name = config.DATASET.DATASET
self.mean = np.array(config.DATASET.MEAN, dtype=np.float32)
self.std = np.array(config.DATASET.STD, dtype=np.float32)
char_file = config.DATASET.CHAR_FILE # 字典?
with open(char_file, 'rb') as file:
# 读取char_file中的所有行的文字,并以字典形式存储键值(序号)和(去除了前后空格的)字符编码
char_dict = {num: char.strip().decode('gbk', 'ignore') for num, char in enumerate(file.readlines())}
txt_file = config.DATASET.JSON_FILE['train'] if is_train else config.DATASET.JSON_FILE['val']
# convert name:indices to name:string
self.labels = []
with open(txt_file, 'r', encoding='utf-8') as file:
contents = file.readlines()
for c in contents:
imgname = c.split(' ')[0]
indices = c.split(' ')[1:]
# 在这里相当于对train.txt / test.txt中的 以index list形式为label进行了decode,得到了str
string = ''.join([char_dict[int(idx)] for idx in indices])
self.labels.append({imgname: string})
print("load {} images!".format(self.__len__()))
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
img_name = list(self.labels[idx].keys())[0] # 即 label
img = cv2.imread(os.path.join(self.root, img_name))
# 转化为灰度图像
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 获取图像原始高度
img_h, img_w = img.shape
# 整体缩放到inp_h,inp_w
img = cv2.resize(img, (0,0), fx=self.inp_w / img_w, fy=self.inp_h / img_h, interpolation=cv2.INTER_CUBIC)
img = np.reshape(img, (self.inp_h, self.inp_w, 1))
img = img.astype(np.float32)
# yaml中配置
img = (img/255. - self.mean) / self.std
# chanel,h,w
img = img.transpose([2, 0, 1])
return img, idx
from ._360cc import _360CC
from ._own import _OWN
def get_dataset(config):
if config.DATASET.DATASET == "360CC":
return _360CC
elif config.DATASET.DATASET == "OWN":
return _OWN
else:
raise NotImplemented()
\ No newline at end of file
from __future__ import print_function, absolute_import
import torch.utils.data as data
import os
import numpy as np
import cv2
class _OWN(data.Dataset):
def __init__(self, config, is_train=True):
self.root = config.DATASET.ROOT
self.is_train = is_train
self.inp_h = config.MODEL.IMAGE_SIZE.H
self.inp_w = config.MODEL.IMAGE_SIZE.W
self.dataset_name = config.DATASET.DATASET
self.mean = np.array(config.DATASET.MEAN, dtype=np.float32)
self.std = np.array(config.DATASET.STD, dtype=np.float32)
txt_file = config.DATASET.JSON_FILE['train'] if is_train else config.DATASET.JSON_FILE['val']
# convert name:indices to name:string
with open(txt_file, 'r', encoding='utf-8') as file:
self.labels = [{c.split(' ')[0]: c.split(' ')[-1][:-1]} for c in file.readlines()]
print("load {} images!".format(self.__len__()))
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
img_name = list(self.labels[idx].keys())[0]
img = cv2.imread(os.path.join(self.root, img_name))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_h, img_w = img.shape
img = cv2.resize(img, (0,0), fx=self.inp_w / img_w, fy=self.inp_h / img_h, interpolation=cv2.INTER_CUBIC)
img = np.reshape(img, (self.inp_h, self.inp_w, 1))
img = img.astype(np.float32)
img = (img/255. - self.mean) / self.std
img = img.transpose([2, 0, 1])
return img, idx
blank
0
1
:
2
5
e
3
.
4
6
o
t
,
9
谿
8
i
a
n
7
s
;
r
?
-
)
(
l
c
h
!
[
]
d
m
u
%
릿
"
A
C
w
p
g
S
f
T
y
M
B
I
D
P
/
G
槿
b
꼿
E
k
v
O
N
·
L
R
F
錄
竿
p
W
H
_
V
x
K
=
+
U
'
J
X
±
×
禿
귿
*
z
Q
D
Y
&
]
j
믿
尿
絿
q
q
~
Z
勵
#
>
ħ
{
}
k
S
@
稿
|
s
S
\
刺
`
宿
u
t
U
g
姿
o
e
C
鹿
퀀
綿
e
n
k
O
ı
N
e
]
樂
輿
$
a
M
t
굿
X
\
IJ
~
F
I
Ħ
z
F
J
s
調
L
Q
I
\
[
F
p
W
l
P
뀀
t
I
e
i
k
F
|
x
J
o
÷
`
u
t
v
^
a
t
f
V
k
}
H
X
n
f
m
K
^
Q
C
L
y
G
ʼn
貿
西
f
s
u
t
_
d
漿
v
T
J
<
_
B
Q
V
o
В
Ь
q
o
@
S
S
b
V
u
t
l
h
j
A
I
S
@
K
P
F
]
i
J
M
W
a
\
d
^
h
u
B
в
A
E
B
T
i
~
`
耀
_
A
r
ij
x
U
L
R
A
\
G
P
p
@
C
Z
b
j
d
r
i
W
U
G
i
g
v
|
S
Z
y
f
L
y
i
f
|
G
k
g
r
沿
A
x
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -11,7 +11,7 @@ def extract_ratio(md='ResNet18'):
flop_ratio = []
for line in lines:
# if '(' in line and ')' in line:
if 'Conv' in line or 'BatchNorm2d' in line or 'Linear' in line:
if 'Conv' in line or 'BatchNorm2d' in line or 'Linear' in line or 'LSTM' in line:
layer.append(line.split(':')[1].split('(')[0])
r1 = line.split('%')[0].split(',')[-1]
r1 = float(r1)
......
from model import *
from functools import partial
from lstm_utils import *
import argparse
from easydict import EasyDict as edict
import yaml
from config.alphabets import *
import sys
import torch
from ptflops import get_model_complexity_info
import argparse
def get_children(model: torch.nn.Module):
# get children form model!
# 为了后续也能够更新参数,需要用nn.ModuleList来承载
data_path = '../data/ptb'
embed_size = 700
hidden_size = 700
eval_batch_size = 10
dropout = 0.65
tied = True
def parse_arg():
parser = argparse.ArgumentParser(description="train crnn")
# children = nn.ModuleList(model.children())
# print(children)
# 方便对其中的module进行后续的更新
# flatt_children = nn.ModuleList()
# parser.add_argument('--cfg', help='experiment configuration filename', required=True, type=str)
parser.add_argument('--cfg', help='experiment configuration filename', type=str, default='config/360CC_config.yaml')
children = list(model.children())
# flatt_children = nn.ModuleList()
flatt_children = []
if len(children) == 0:
# if model has no children; model is last child! :O
return model
else:
# look for children from children... to the last child!
for child in children:
try:
flatt_children.extend(get_children(child))
except TypeError:
flatt_children.append(get_children(child))
args = parser.parse_args()
with open(args.cfg, 'r') as f:
# config = yaml.load(f, Loader=yaml.FullLoader)
config = yaml.load(f,Loader=yaml.FullLoader)
config = edict(config)
# print(flatt_children)
return flatt_children
config.DATASET.ALPHABETS = alphabet
config.MODEL.NUM_CLASSES = len(config.DATASET.ALPHABETS)
# 定义获取不包含wrapper的所有子模块的函数
def get_all_child_modules(module):
for name, child in module.named_children():
if isinstance(child, nn.Sequential):
yield from get_all_child_modules(child)
elif len(list(child.children())) > 0:
yield from child.children()
else:
yield child
return config
def filter_fn(module, n_inp, outp_shape):
# if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d, torch.nn.ReLU,torch.nn.BatchNorm2d,torch.nn.Linear,torch.nn.AdaptiveAvgPool2d)):
if 'conv' in module or 'bn' in module or 'fc' in module or 'avg' in module or 'relu' in module:
return True
return False
def lstm_constructor(shape,hidden):
return {"x": torch.zeros(shape,dtype=torch.int64),
"hidden": hidden}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Model Analysis --- params & flops')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
args = parser.parse_args()
if args.model == 'ResNet18':
model = resnet18()
elif args.model == 'ResNet50':
model = resnet50()
elif args.model == 'ResNet152':
model = resnet152()
config = parse_arg()
model = model = get_crnn(config)
full_file = 'ckpt/cifar10_' + args.model + '.pt'
full_file = 'ckpt/360cc_lstm-ocr.pt'
model.load_state_dict(torch.load(full_file))
# flat = get_children(model)
# print(flat)
# flat = get_children(model)
# new_model = nn.Sequential(*flat)
flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True)
flops, params = get_model_complexity_info(model, (1, 32, 160), as_strings=True,
print_per_layer_stat=True)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def get_optimizer(config, model):
optimizer = None
if config.TRAIN.OPTIMIZER == "sgd":
optimizer = optim.SGD(
filter(lambda p: p.requires_grad, model.parameters()),
lr=config.TRAIN.LR,
momentum=config.TRAIN.MOMENTUM,
weight_decay=config.TRAIN.WD,
nesterov=config.TRAIN.NESTEROV
)
elif config.TRAIN.OPTIMIZER == "adam":
optimizer = optim.Adam(
filter(lambda p: p.requires_grad, model.parameters()),
lr=config.TRAIN.LR,
)
elif config.TRAIN.OPTIMIZER == "rmsprop":
optimizer = optim.RMSprop(
filter(lambda p: p.requires_grad, model.parameters()),
lr=config.TRAIN.LR,
momentum=config.TRAIN.MOMENTUM,
weight_decay=config.TRAIN.WD,
# alpha=config.TRAIN.RMSPROP_ALPHA,
# centered=config.TRAIN.RMSPROP_CENTERED
)
return optimizer
def get_batch_label(d, i):
label = []
for idx in i:
label.append(list(d.labels[idx].values())[0])
return label
# 可能主要用于OWN dataset?
class strLabelConverter(object):
"""Convert between str and label.
NOTE:
Insert `blank` to the alphabet for CTC.
Args:
alphabet (str): set of the possible characters.
ignore_case (bool, default=True): whether or not to ignore all of the case.
"""
def __init__(self, alphabet, ignore_case=False):
self._ignore_case = ignore_case
if self._ignore_case:
alphabet = alphabet.lower()
self.alphabet = alphabet + '-' # for `-1` index
self.dict = {}
for i, char in enumerate(alphabet):
# NOTE: 0 is reserved for 'blank' required by wrap_ctc
self.dict[char] = i + 1
def encode(self, text):
"""Support batch or single str.
Args:
text (str or list of str): texts to convert.
Returns:
torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts.
torch.IntTensor [n]: length of each text.
"""
length = []
result = []
# 如果是字符形式的输入,则decode_flag = True
decode_flag = True if type(text[0])==bytes else False
# text是list of str
for item in text:
# 变为utf8编码
if decode_flag:
item = item.decode('utf-8','strict')
length.append(len(item))
# 对str中的每个字符
for char in item:
# 在字典中对应的编号
index = self.dict[char]
result.append(index)
text = result # 相当于给翻译成了alphabet中的index
return (torch.IntTensor(text), torch.IntTensor(length))
def decode(self, t, length, raw=False):
"""Decode encoded texts back into strs.
Args:
torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts.
torch.IntTensor [n]: length of each text.
Raises:
AssertionError: when the texts and its length does not match.
Returns:
text (str or list of str): texts to convert.
"""
# 判断length中是否只含一个元素
if length.numel() == 1:
length = length[0]
assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length)
if raw:
return ''.join([self.alphabet[i - 1] for i in t])
else:
char_list = []
for i in range(length):
if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):
char_list.append(self.alphabet[t[i] - 1]) # alphabet是字符列表,但顺序被self.dict记录了
# 将一个字符串列表char_list中的所有元素按顺序连接起来形成一个新的字符串。
return ''.join(char_list)
else:
# batch mode
assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum())
texts = []
index = 0
for i in range(length.numel()): # 遍历各个str,并调用上面的情况去处理decode
l = length[i]
texts.append(
self.decode(
t[index:index + l], torch.IntTensor([l]), raw=raw))
index += l
return texts
def get_char_dict(path):
with open(path, 'rb') as file:
char_dict = {num: char.strip().decode('gbk', 'ignore') for num, char in enumerate(file.readlines())}
def model_info(model): # Plots a line-by-line description of a PyTorch model
n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '')
print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g))
\ No newline at end of file
# Copyright (c) 2018, Xilinx, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import math
import numpy
import torch
import torch.nn as nn
import torch.nn.functional as F
from module import *
from functools import partial
# from quantization.modules.rnn import QuantizedLSTM
# from quantization.modules.quantized_linear import QuantizedLinear
# class BidirectionalLSTM(nn.Module):
# # Inputs hidden units Out
# def __init__(self, nIn, nHidden, nOut):
# super(BidirectionalLSTM, self).__init__()
class SequenceWise(nn.Module):
def __init__(self, module):
"""
Collapses input of dim T*N*H to (T*N)*H, and applies to a module.
Allows handling of variable sequence lengths and minibatch sizes.
:param module: Module to apply input to.
"""
super(SequenceWise, self).__init__()
self.module = module
# self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
# self.embedding = nn.Linear(nHidden * 2, nOut)
def forward(self, x):
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
x = self.module(x)
x = x.view(t, n, -1)
return x
def __repr__(self):
tmpstr = self.__class__.__name__ + ' (\n'
tmpstr += self.module.__repr__()
tmpstr += ')'
return tmpstr
# 可以决定是 FC-BN 还是只是 FC
class FusedBatchNorm1dLinear(nn.Module):
def __init__(self, trainer_params, batch_norm, linear):
super(FusedBatchNorm1dLinear, self).__init__()
self.batch_norm = batch_norm
self.linear = linear
self.trainer_params = trainer_params
# def forward(self, input):
# recurrent, _ = self.rnn(input)
# T, b, h = recurrent.size()
# t_rec = recurrent.view(T * b, h)
def forward(self, x):
if self.trainer_params.prefused_bn_fc:
x = self.linear(x)
else:
x = self.batch_norm(x)
x = self.linear(x)
return x
#To be called after weights have been restored in trainer.py
def init_fusion(self):
print("Fusing BN-FC")
bn_weight_var = torch.mul(self.batch_norm.weight.data, torch.rsqrt(self.batch_norm.running_var + self.batch_norm.eps))
bias_coeff = self.batch_norm.bias.data - torch.mul(self.batch_norm.running_mean, bn_weight_var)
self.linear.bias.data = torch.addmv(self.linear.bias.data, self.linear.weight.data, bias_coeff)
self.linear.weight.data = self.linear.weight.data * bn_weight_var.expand_as(self.linear.weight.data)
class BiLSTM(nn.Module):
def __init__(self, trainer_params):
super(BiLSTM, self).__init__()
self.trainer_params = trainer_params
print(f"self.trainer_params.reduce_bidirectional:{self.trainer_params.reduce_bidirectional}")
# self.trainer_params.reduce_bidirectional = 'CONCAT'
# if self.trainer_params.bidirectional and self.trainer_params.reduce_bidirectional == 'CONCAT':
# self.reduce_factor = 2
# else:
# self.reduce_factor = 1
# 若是 LSTM ,则括号中的是对类的输入设置
# self.recurrent_layer = self.recurrent_layer_type(input_size=self.trainer_params.input_size,
# hidden_size=self.trainer_params.num_units,
# num_layers=self.trainer_params.num_layers,
# batch_first=False,
# bidirectional=self.trainer_params.bidirectional,
# bias=self.trainer_params.recurrent_bias_enabled)
# self.recurrent_layer = nn.LSTM(input_size=self.trainer_params.input_size,
# hidden_size=self.trainer_params.num_units,
# num_layers=self.trainer_params.num_layers,
# batch_first=False,
# bidirectional=self.trainer_params.bidirectional,
# bias=self.trainer_params.recurrent_bias_enabled)
self.lstm_layers = nn.ModuleList()
# 创建第1层LSTM模型,并添加到ModuleList中
lstm = nn.LSTM( input_size=self.trainer_params.input_size,
hidden_size=self.trainer_params.num_units,
num_layers=1,
batch_first=False,
bidirectional=self.trainer_params.bidirectional,
bias=self.trainer_params.recurrent_bias_enabled)
self.lstm_layers.append(lstm)
# 创建第2至num_layers层LSTM模型,并添加到ModuleList中
for i in range(1, self.trainer_params.num_layers):
lstm = nn.LSTM(input_size=self.trainer_params.num_units * 2 if self.trainer_params.bidirectional else self.trainer_params.num_units,
hidden_size=self.trainer_params.num_units,
num_layers=1,
batch_first=False,
bidirectional=self.trainer_params.bidirectional,
bias=self.trainer_params.recurrent_bias_enabled)
self.lstm_layers.append(lstm)
# self.batch_norm_fc = FusedBatchNorm1dLinear(
# trainer_params,
# nn.BatchNorm1d(self.reduce_factor * self.trainer_params.num_units),
# nn.Linear(
# in_features=self.reduce_factor * self.trainer_params.num_units,
# out_features=trainer_params.num_classes,
# bias=True )
# )
self.fc1 = nn.Linear(
in_features=self.reduce_factor * self.trainer_params.num_units,
out_features=trainer_params.num_classes,
bias=True )
# self.output_layer = nn.Sequential(SequenceWise(self.batch_norm_fc), nn.LogSoftmax(dim=2))
self.output_layer = nn.Sequential(SequenceWise(self.fc1), nn.LogSoftmax(dim=2))
@property
def reduce_factor(self):
if self.trainer_params.bidirectional and self.trainer_params.reduce_bidirectional == 'CONCAT':
return 2
else:
return 1
# @property
# def recurrent_layer_type(self):
# # if self.trainer_params.neuron_type == 'QLSTM':
# # func = QuantizedLSTM
# # elif self.trainer_params.neuron_type == 'LSTM':
# # func = nn.LSTM
# if self.trainer_params.neuron_type == 'LSTM':
# func = nn.LSTM
# else:
# raise Exception("Invalid neuron type.")
# if self.trainer_params.neuron_type == 'QLSTM':
# func = partial(func, bias_bit_width=self.trainer_params.recurrent_bias_bit_width,
# bias_q_type=self.trainer_params.recurrent_bias_quantization,
# weight_bit_width=self.trainer_params.recurrent_weight_bit_width,
# weight_q_type=self.trainer_params.recurrent_weight_quantization,
# activation_bit_width=self.trainer_params.recurrent_activation_bit_width,
# activation_q_type=self.trainer_params.recurrent_activation_quantization,
# internal_activation_bit_width=self.trainer_params.internal_activation_bit_width)
# return func
# output = self.embedding(t_rec) # [T * b, nOut]
# output = output.view(T, b, -1)
def forward(self, x):
# 似乎是因为现在只有一个lstm cell (num_layers = 1),所以h没用上
# x, h = self.recurrent_layer(x)
# return output
h_n = []
c_n = []
class CRNN(nn.Module):
def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
super(CRNN, self).__init__()
assert imgH % 16 == 0, 'imgH has to be a multiple of 16'
# 遍历ModuleList中的每个LSTM模型,依次进行前向计算
for i, lstm in enumerate(self.lstm_layers):
# 如果不是第1层LSTM,则将输入的隐藏状态和细胞状态作为该层LSTM的初始状态
if i > 0:
x, (h, c) = lstm(x, (h_n[-1], c_n[-1]))
else:
x, (h, c) = lstm(x)
self.layers = nn.ModuleDict()
# 将该层LSTM的隐藏状态和细胞状态添加到列表中,用于下一层LSTM的输入
h_n.append(h)
c_n.append(c)
# self.conv1 = nn.Conv2d(nc,64,3,1,1)
# self.relu1 = nn.ReLU()
# self.pool1 = nn.MaxPool2d(2, 2)
self.layers['conv1'] = nn.Conv2d(nc,64,3,1,1)
self.layers['relu1'] = nn.ReLU()
self.layers['pool1'] = nn.MaxPool2d(2, 2)
# self.conv2 = nn.Conv2d(64,128,3,1,1)
# self.relu2 = nn.ReLU()
# self.pool2 = nn.MaxPool2d(2, 2)
if self.trainer_params.bidirectional:
if self.trainer_params.reduce_bidirectional == 'SUM':
x = x.view(x.size(0), x.size(1), 2, -1).sum(2).view(x.size(0), x.size(1), -1)
elif self.trainer_params.reduce_bidirectional == 'CONCAT':
#do nothing, x is already in the proper shape
pass
else:
raise Exception('Unknown reduce mode: {}'.format(self.trainer_params.reduce_bidirectional))
x = self.output_layer(x)
return x
self.layers['conv2'] = nn.Conv2d(64,128,3,1,1)
self.layers['relu2'] = nn.ReLU()
self.layers['pool2'] = nn.MaxPool2d(2, 2)
def quantize(self, quant_type, num_bits=8, e_bits=3):
self.qlstm_layers = nn.ModuleDict()
for i, lstm in enumerate(self.lstm_layers):
# 如果不是第1层LSTM,则将输入的隐藏状态和细胞状态作为该层LSTM的初始状态
if i > 0:
self.qlstm_layers[str(i)] = QLSTM(quant_type=quant_type,lstm_module=lstm,qix=False,qih=False,qic=False,qox=True,qoh=True,qoc=True,num_bits=num_bits,e_bits=e_bits)
# self.conv3 = nn.Conv2d(128,256,3,1,1)
# self.bn3 = nn.BatchNorm2d(256)
# self.relu3 = nn.ReLU()
# 第一层lstm layer没有输入的h和c,因此qih,qic为False,有x,qix置为True
else:
self.qlstm_layers[str(i)] = QLSTM(quant_type=quant_type,lstm_module=lstm,qix=True,qih=False,qic=False,qox=True,qoh=True,qoc=True,num_bits=num_bits,e_bits=e_bits)
self.layers['conv3'] = nn.Conv2d(128,256,3,1,1)
self.layers['bn3'] = nn.BatchNorm2d(256)
self.layers['relu3'] = nn.ReLU()
# self.conv4 = nn.Conv2d(256,256,3,1,1)
# self.relu4 = nn.ReLU()
# self.pool4 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))
self.qfc1 = QLinear(quant_type, self.fc1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.layers['conv4'] = nn.Conv2d(256,256,3,1,1)
self.layers['relu4'] = nn.ReLU()
self.layers['pool4'] = nn.MaxPool2d((2, 2), (2, 1), (0, 1))
# for name,layer in self.qlstm_layers.items():
# print(f"name:{name}")
# self.conv5 = nn.Conv2d(256,512,3,1,1)
# self.bn5 = nn.BatchNorm2d(512)
# self.relu5 = nn.ReLU()
def quantize_forward(self, x):
for name, layer in self.qlstm_layers.items():
if '0' in name:
x,(h,c) = layer(x)
self.layers['conv5'] = nn.Conv2d(256,512,3,1,1)
self.layers['bn5'] = nn.BatchNorm2d(512)
self.layers['relu5'] = nn.ReLU()
# self.conv6 = nn.Conv2d(512,512,3,1,1)
# self.relu6 = nn.ReLU()
# self.pool6 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))
self.layers['conv6'] = nn.Conv2d(512,512,3,1,1)
self.layers['relu6'] = nn.ReLU()
self.layers['pool6'] = nn.MaxPool2d((2, 2), (2, 1), (0, 1))
# self.conv7 = nn.Conv2d(512,512,2,0,1)
# self.bn7 = nn.BatchNorm2d(512)
# self.relu7 = nn.ReLU()
self.layers['conv7'] = nn.Conv2d(512,512,2,1,0)
self.layers['bn7'] = nn.BatchNorm2d(512)
self.layers['relu7'] = nn.ReLU()
# 默认用的CONCAT
# self.lstm1 = nn.LSTM(512, nh, bidirectional=True)
# self.fc1 = nn.Linear(nh*2, nh)
# self.lstm2 = nn.LSTM(nh,nh,bidirectional=True)
# self.fc2 = nn.Linear(nh*2 , nclass)
self.layers['lstm1'] = nn.LSTM(512, nh, bidirectional=True)
self.layers['fc1'] = nn.Linear(nh*2, nh)
self.layers['lstm2'] = nn.LSTM(nh,nh,bidirectional=True)
self.layers['fc2'] = nn.Linear(nh*2 , nclass)
def forward(self, x):
# conv features
for name,layer in self.layers.items():
if 'lstm' in name:
break
else:
x,(h,c) = layer(x,h,c)
x = layer(x)
x = x.squeeze(2) # b *512 * width
# 需要将width作为seq继续传给lstm
x = x.permute(2,0,1) # [w, b, c]
x,_ = self.layers['lstm1'](x)
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
x = self.layers['fc1'](x)
x = x.view(t,n,-1)
x,_ = self.layers['lstm2'](x)
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
x = self.qfc1(x)
x = x.view(t, n, -1)
x = self.layers['fc2'](x)
x = x.view(t,n,-1)
x = F.log_softmax(x,dim=2)
output = F.log_softmax(x, dim=2)
# out = F.softmax(x, dim=1)
# return out
return output
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
self.qlayers = nn.ModuleDict()
self.qlayers['qconv1'] = QConv2d(quant_type, self.layers['conv1'], qi=True, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qrelu1'] = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qpool1'] = QMaxPooling2d(quant_type, kernel_size=self.layers['pool1'].kernel_size, stride=self.layers['pool1'].stride, padding=self.layers['pool1'].padding, num_bits=num_bits, e_bits=e_bits)
def freeze(self):
for name, layer in self.qlstm_layers.items():
if '0' in name:
layer.freeze(flag=0)
else:
layer.freeze(qix = self.qlstm_layers[str(int(name)-1)].qox, qih=self.qlstm_layers[str(int(name)-1)].qoh, qic=self.qlstm_layers[str(int(name)-1)].qoc,flag=1)
self.qlayers['qconv2'] = QConv2d(quant_type, self.layers['conv2'], qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qrelu2'] = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qpool2'] = QMaxPooling2d(quant_type, kernel_size=self.layers['pool2'].kernel_size, stride=self.layers['pool2'].stride, padding=self.layers['pool2'].padding, num_bits=num_bits, e_bits=e_bits)
self.qfc1.freeze(qi=self.qlstm_layers[name].qox)
self.qlayers['qconvbnrelu3'] = QConvBNReLU(quant_type, self.layers['conv3'], self.layers['bn3'], qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
def quantize_inference(self, x):
# 首先对x进行一个伪量化 (适配于lstm的伪量化)
x = FakeQuantize.apply(x,self.qlstm_layers['0'].qix)
for name, layer in self.qlstm_layers.items():
if '0' in name:
x,(h,c) = layer.quantize_inference(x)
self.qlayers['qconv4'] = QConv2d(quant_type, self.layers['conv4'], qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qrelu4'] = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qpool4'] = QMaxPooling2d(quant_type, kernel_size=self.layers['pool4'].kernel_size, stride=self.layers['pool4'].stride, padding=self.layers['pool4'].padding, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qconvbnrelu5'] = QConvBNReLU(quant_type, self.layers['conv5'], self.layers['bn5'], qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qconv6'] = QConv2d(quant_type, self.layers['conv6'], qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qrelu6'] = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qpool6'] = QMaxPooling2d(quant_type, kernel_size=self.layers['pool6'].kernel_size, stride=self.layers['pool6'].stride, padding=self.layers['pool6'].padding, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qconvbnrelu7'] = QConvBNReLU(quant_type, self.layers['conv7'], self.layers['bn7'], qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qlstm1'] = QLSTM(quant_type, self.layers['lstm1'], has_hidden=False, qix=False, qih=True, qic=True, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qfc1'] = QLinear(quant_type, self.layers['fc1'], qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
# 并没有接续上一个lstm 因此qih,qic仍需要True来统计
self.qlayers['qlstm2'] = QLSTM(quant_type, self.layers['lstm2'], has_hidden=False, qix=False, qih=True, qic=True, num_bits=num_bits, e_bits=e_bits)
self.qlayers['qfc2'] = QLinear(quant_type, self.layers['fc2'], qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
def quantize_forward(self, x):
# conv features
for name,layer in self.qlayers.items():
if 'qlstm' in name:
break
else:
x,(h,c) = layer.quantize_inference(x,h,c)
x = layer(x)
x = x.squeeze(2) # b *512 * width
# 需要将width作为seq继续传给lstm
x = x.permute(2,0,1) # [w, b, c]
x,_ = self.qlayers['qlstm1'](x)
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
x = self.qlayers['qfc1'](x)
x = x.view(t,n,-1)
x,_ = self.qlayers['qlstm2'](x)
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
x = self.qlayers['qfc2'](x)
x = x.view(t,n,-1)
# 经过修改后的QLinear的quantize_inference中对输入的x进行过quantize,因此在这里需要dequantize一下.
x = self.qfc1.quantize_inference(x)
x = self.qfc1.qo.dequantize_tensor(x)
x = x.view(t, n, -1)
output = F.log_softmax(x, dim=2)
x = F.log_softmax(x,dim=2)
return output
return x
def freeze(self):
last_name = ''
for name,layer in self.qlayers.items():
if last_name == '':
layer.freeze()
else:
if 'lstm' not in last_name:
layer.freeze(self.qlayers[last_name].qo)
else:
layer.freeze(self.qlayers[last_name].qox)
if 'conv' in name or 'fc' in name or 'lstm' in name:
last_name = name
def fakefreeze(self):
for name,layer in self.qlayers.items():
if 'lstm' not in name:
layer.fakefreeze()
def export(self, output_path, simd_factor, pe):
if self.trainer_params.neuron_type == 'QLSTM':
assert(self.trainer_params.input_size % simd_factor == 0)
assert(self.trainer_params.num_units % simd_factor == 0)
assert((simd_factor >= 1 and pe == 1) or (simd_factor == 1 and pe >= 1))
ih_simd = self.trainer_params.input_size / simd_factor
hh_simd = self.trainer_params.num_units / simd_factor
lstm_weight_ih = self.recurrent_layer.hls_lstm_weight_ih_string(ih_simd, pe)
lstm_weight_hh = self.recurrent_layer.hls_lstm_weight_hh_string(hh_simd, pe)
def quantize_inference(self, x):
x= self.qlayers['qconv1'].qi.quantize_tensor(x)
for name,layer in self.qlayers.items():
if 'qlstm' in name:
break
else:
x = layer.quantize_inference(x)
# dequantize => fp32 scale
# x = layer.qix.dequantize_tensor(x)
x = self.qlayers['qconvbnrelu7'].qo.dequantize_tensor(x)
x = x.squeeze(2) # b *512 * width
# 需要将width作为seq继续传给lstm
x = x.permute(2,0,1) # [w, b, c]
lstm_weight_decl_list = map(list, zip(*lstm_weight_ih))[0] + map(list, zip(*lstm_weight_hh))[0]
lstm_weight_string_list = map(list, zip(*lstm_weight_ih))[1] + map(list, zip(*lstm_weight_hh))[1]
x,_ = self.qlayers['qlstm1'].quantize_inference(x)
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
if self.trainer_params.recurrent_bias_enabled:
lstm_bias = self.recurrent_layer.hls_lstm_bias_strings(pe)
lstm_bias_decl_list = map(list, zip(*lstm_bias))[0]
lstm_bias_string_list = map(list, zip(*lstm_bias))[1]
x = self.qlayers['qfc1'].qi.quantize_tensor(x)
x = self.qlayers['qfc1'].quantize_inference(x)
x = self.qlayers['qfc1'].qo.dequantize_tensor(x)
fc_weight_decl, fc_weight_string = self.batch_norm_fc.linear.hls_weight_string(self.reduce_factor)
fc_bias_decl, fc_bias_string = self.batch_norm_fc.linear.hls_bias_string(self.reduce_factor)
x = x.view(t,n,-1)
x,_ = self.qlayers['qlstm2'].quantize_inference(x)
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
def define(name, val):
return "#define {} {}\n".format(name, val)
x = self.qlayers['qfc2'].qi.quantize_tensor(x)
x = self.qlayers['qfc2'].quantize_inference(x)
x = self.qlayers['qfc2'].qo.dequantize_tensor(x)
with open(output_path, 'w') as f:
print("Exporting model to {}".format(output_path))
f.write("#pragma once" + '\n')
x = x.view(t,n,-1)
f.write(define("PE", pe))
f.write(define("SIMD_INPUT", ih_simd))
f.write(define("SIMD_RECURRENT", hh_simd))
f.write(define("NUMBER_OF_NEURONS", self.trainer_params.num_units))
f.write(define("NUMBER_OF_NEURONS_TYPEWIDTH", int(math.ceil(math.log(self.trainer_params.num_units, 2.0)) + 2)))
f.write(define("HEIGHT_IN_PIX", self.trainer_params.input_size))
f.write(define("HEIGHT_IN_PIX_TYPEWIDTH", int(math.ceil(math.log(self.trainer_params.input_size, 2.0)) + 2)))
f.write(define("NUMBER_OF_CLASSES", self.trainer_params.num_classes))
f.write(define("NUMBER_OF_CLASSES_TYPEWIDTH", 7+1))
f.write(define("MAX_NUMBER_COLUMNS_TEST_SET", 28*self.trainer_params.word_size))
f.write(define("MAX_NUMBER_COLUMNS_TEST_SET_TYPEWIDTH", 10+1))
f.write(define("SIZE_OF_OUTPUT_BUFFER", 96))
f.write(define("DIRECTIONS", 2 if self.trainer_params.bidirectional else 1))
data_width = 64
input_bit_width = self.trainer_params.recurrent_activation_bit_width if self.trainer_params.quantize_input else 8
f.write(define("PACKEDWIDTH", int(data_width * input_bit_width / 2)))
f.write(define("DATAWIDTH", data_width))
f.write(define("PIXELWIDTH", input_bit_width))
f.write(define("WEIGHTWIDTH", self.trainer_params.recurrent_weight_bit_width))
f.write(define("BIASWIDTH", self.trainer_params.recurrent_bias_bit_width))
f.write(define("FCWEIGHTWIDTH", self.trainer_params.fc_weight_bit_width))
f.write(define("FCBIASWIDTH", self.trainer_params.fc_bias_bit_width))
f.write(define("OUTPUTACTIVATIONHIDDENLAYERWIDTH", self.trainer_params.recurrent_activation_bit_width))
f.write(define("OUTPUTACTIVATIONOUTPUTLAYERWIDTH", 16))
output = F.log_softmax(x, dim=2)
# write lstm weight decl
for decl in lstm_weight_decl_list:
f.write(decl + '\n')
return output
# write lstm bias decl
if self.trainer_params.recurrent_bias_enabled:
for decl in lstm_bias_decl_list:
f.write(decl + '\n')
# write fc weight and bias decl
f.write(fc_weight_decl + '\n')
f.write(fc_bias_decl + '\n')
# write lstm weights
for string in lstm_weight_string_list:
f.write(string + '\n')
# Xavier initialization 使每个神经元的输出方差大致相等 避免梯度消失或梯度爆炸
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
# write lstm bias
if self.trainer_params.recurrent_bias_enabled:
for string in lstm_bias_string_list:
f.write(string + '\n')
def get_crnn(config):
# write fc weights and bias
f.write(fc_weight_string + '\n')
f.write(fc_bias_string + '\n')
model = CRNN(config.MODEL.IMAGE_SIZE.H, 1, config.MODEL.NUM_CLASSES + 1, config.MODEL.NUM_HIDDEN)
model.apply(weights_init)
else:
raise Exception("Export not supported for {}".format(self.trainer_params.neuron_type))
\ No newline at end of file
return model
\ No newline at end of file
......@@ -8,33 +8,26 @@ from torch.autograd import Variable
from function import FakeQuantize
# 获取最近的量化值
# def get_nearest_val(quant_type,x,is_bias=False):
# if quant_type=='INT':
# return x.round_()
# plist = gol.get_value(is_bias)
# # print('get')
# # print(plist)
# # x = x / 64
# shape = x.shape
# xhard = x.view(-1)
# plist = plist.type_as(x)
# # 取最近幂次作为索引
# idx = (xhard.unsqueeze(0) - plist.unsqueeze(1)).abs().min(dim=0)[1]
# xhard = plist[idx].view(shape)
# xout = (xhard - x).detach() + x
# # xout = xout * 64
# return xout
def js_div(p_output, q_output, get_softmax=True):
"""
Function that measures JS divergence between target and output logits:
"""
KLDivLoss = nn.KLDivLoss(reduction='sum')
if get_softmax:
p_output = F.softmax(p_output)
q_output = F.softmax(q_output)
log_mean_output = ((p_output + q_output)/2).log()
return (KLDivLoss(log_mean_output, p_output) + KLDivLoss(log_mean_output, q_output))/2
# 获取最近的量化值
def get_nearest_val(quant_type, x, is_bias=False, block_size=1000000):
if quant_type == 'INT':
return x.round_()
plist = gol.get_value(is_bias)
shape = x.shape
# xhard = x.view(-1)
xhard = x.reshape(-1)
xout = torch.zeros_like(xhard)
......@@ -54,10 +47,9 @@ def get_nearest_val(quant_type, x, is_bias=False, block_size=1000000):
plist_block = plist.unsqueeze(1) #.expand(-1, block_size_i)
idx = (xblock.unsqueeze(0) - plist_block).abs().min(dim=0)[1]
# print(xblock.shape)
xhard_block = plist[idx].view(xblock.shape)
xhard_block = plist[idx].reshape(xblock.shape)
xout[start_idx:end_idx] = (xhard_block - xblock).detach() + xblock
# xout = xout.view(shape)
xout = xout.reshape(shape)
return xout
......@@ -87,7 +79,6 @@ def calcScaleZeroPoint(min_val, max_val, qmax):
# 将输入进行量化,输入输出都为tensor
def quantize_tensor(quant_type, x, scale, zero_point, qmax, is_bias=False):
# 量化后范围,直接根据位宽确定
qmin = -qmax
q_x = zero_point + x / scale
......@@ -161,14 +152,15 @@ class QParam(nn.Module):
info = 'scale: %.10f ' % self.scale
info += 'zp: %.6f ' % self.zero_point
info += 'min: %.6f ' % self.min
info += 'max: %.6f' % self.max
info += 'max: %.6f ' % self.max
info += 'qmax: %6f ' % self.qmax
return info
# 作为具体量化层的父类,qi和qo分别为量化输入/输出
class QModule(nn.Module):
def __init__(self,quant_type, qi=True, qo=True, num_bits=8, e_bits=3):
def __init__(self,quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
super(QModule, self).__init__()
if qi:
self.qi = QParam(quant_type,num_bits, e_bits)
......@@ -182,12 +174,11 @@ class QModule(nn.Module):
def freeze(self):
pass # 空语句
def fakefreeze(self):
pass
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
def fakefreeze(self):
pass
"""
QModule 量化卷积
......@@ -202,7 +193,7 @@ QModule 量化卷积
class QConv2d(QModule):
def __init__(self, quant_type, conv_module, qi=True, qo=True, num_bits=8, e_bits=3):
def __init__(self, quant_type, conv_module, qi=False, qo=True, num_bits=8, e_bits=3):
super(QConv2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.qw = QParam(quant_type, num_bits,e_bits)
......@@ -233,14 +224,16 @@ class QConv2d(QModule):
self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
#考虑conv层无bias,此时forward和inference传入none亦可
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(self.quant_type,
self.conv_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0.,qmax=self.bias_qmax, is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
if self.conv_module.bias is not None:
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale*self.qw.scale,zero_point=0.)
def forward(self, x): # 前向传播,输入张量,x为浮点型数据
if hasattr(self, 'qi'):
......@@ -250,13 +243,8 @@ class QConv2d(QModule):
# foward前更新qw,保证量化weight时候scale正确
self.qw.update(self.conv_module.weight.data)
# 注意:此处主要为了统计各层x和weight范围,未对bias进行量化操作
# tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
# x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
# stride=self.conv_module.stride,
# padding=self.conv_module.padding, dilation=self.conv_module.dilation,
# groups=self.conv_module.groups)
x = F.conv2d(x, FakeQuantize.apply(self.conv_module.weight, self.qw), self.conv_module.bias,
tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
......@@ -273,16 +261,14 @@ class QConv2d(QModule):
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QLinear(QModule):
def __init__(self, quant_type, fc_module, qi=True, qo=True, num_bits=8, e_bits=3):
def __init__(self, quant_type, fc_module, qi=False, qo=True, num_bits=8, e_bits=3):
super(QLinear, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.fc_module = fc_module
self.qw = QParam(quant_type, num_bits, e_bits)
......@@ -309,15 +295,15 @@ class QLinear(QModule):
self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data)
self.fc_module.weight.data = self.fc_module.weight.data - self.qw.zero_point
if self.fc_module.bias is not None:
self.fc_module.bias.data = quantize_tensor(self.quant_type,
self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax, is_bias=True)
def fakefreeze(self):
self.fc_module.weight.data = self.qw.dequantize_tensor(self.fc_module.weight.data)
self.fc_module.bias.data = dequantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale, zero_point=0.)
if self.fc_module.bias is not None:
self.fc_module.bias.data = dequantize_tensor(self.fc_module.bias.data,scale=self.qi.scale*self.qw.scale,zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
......@@ -325,9 +311,8 @@ class QLinear(QModule):
x = FakeQuantize.apply(x, self.qi)
self.qw.update(self.fc_module.weight.data)
# tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
# x = F.linear(x, tmp_wgt, self.fc_module.bias)
x = F.linear(x, FakeQuantize.apply(self.fc_module.weight, self.qw), self.fc_module.bias)
tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
x = F.linear(x, tmp_wgt, self.fc_module.bias)
if hasattr(self, 'qo'):
self.qo.update(x)
......@@ -335,26 +320,28 @@ class QLinear(QModule):
return x
# 仍用freeze后再quantize_inference,只是对于lstm伪量化得到的output,先将其qi.quantize_tensor再作用到QLinear中
def quantize_inference(self, x):
# 这里是为了衔接lstm输出的 fp32 scale的x
x = self.qi.quantize_tensor(x)
x = x - self.qi.zero_point
x = self.fc_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
# if hasattr(self, 'qi'):
# x = FakeQuantize.apply(x, self.qi)
# tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
# x = F.linear(x, tmp_wgt, self.fc_module.bias)
# if hasattr(self, 'qo'):
# x = FakeQuantize.apply(x, self.qo)
# return x
class QReLU(QModule):
def __init__(self,quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
def __init__(self,quant_type, qi=False, qo=False, num_bits=8, e_bits=3):
super(QReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
def freeze(self, qi=None):
......@@ -385,9 +372,10 @@ class QReLU(QModule):
return x
class QMaxPooling2d(QModule):
def __init__(self, quant_type, kernel_size=3, stride=1, padding=0, qi=False, qo=True, num_bits=8,e_bits=3):
def __init__(self, quant_type, kernel_size=3, stride=1, padding=0, qi=False, qo=False, num_bits=8,e_bits=3):
super(QMaxPooling2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.kernel_size = kernel_size
self.stride = stride
......@@ -413,33 +401,13 @@ class QMaxPooling2d(QModule):
def quantize_inference(self, x):
return F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
class QConvBNReLU(QModule):
class QAdaptiveAvgPool2d(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBNReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
def __init__(self, quant_type, output_size, qi=False, qo=True, num_bits=8,e_bits=3):
super(QAdaptiveAvgPool2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.output_size = output_size
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
......@@ -456,96 +424,33 @@ class QConvBNReLU(QModule):
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True))
else:
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
self.M.data = (self.qi.scale / self.qo.scale).data
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
x = F.adaptive_avg_pool2d(x, self.output_size)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = x - self.qi.zero_point
x = self.conv_module(x)
x = F.adaptive_avg_pool2d(x, self.output_size)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
x.clamp_(min=0)
x = x+self.qo.zero_point
return x
class QConvBNReLU(QModule):
class QConvBN(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBN, self).__init__(quant_type, qi, qo, num_bits, e_bits)
def __init__(self, quant_type, conv_module, bn_module, qi=False, qo=True, num_bits=8, e_bits=3):
super(QConvBNReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
......@@ -594,18 +499,19 @@ class QConvBN(QModule):
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True))
else:
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
else:
bias = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
self.conv_module.bias = torch.nn.Parameter(bias)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale*self.qw.scale,zero_point=0.)
def forward(self, x):
......@@ -646,7 +552,7 @@ class QConvBN(QModule):
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
# x = F.relu(x)
x = F.relu(x)
if hasattr(self, 'qo'):
self.qo.update(x)
......@@ -658,68 +564,15 @@ class QConvBN(QModule):
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
# x.clamp_(min=0)
return x
# 待修改 需要有qo吧
class QAdaptiveAvgPool2d(QModule):
def __init__(self, quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
super(QAdaptiveAvgPool2d, self).__init__(quant_type,qi,qo,num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qo is not None:
self.qo = qo
self.M.data = (self.qi.scale / self.qo.scale).data
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi) # 与ReLu一样,先更新qi的scale,再将x用PoT表示了 (不过一般前一层的qo都是True,则x已经被PoT表示了)
x = F.adaptive_avg_pool2d(x,(1, 1)) # 对输入输出都量化一下就算是量化了
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = F.adaptive_avg_pool2d(x,(1, 1)) # 对输入输出都量化一下就算是量化了
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x.clamp_(min=0)
return x
class QConvBNReLU6(QModule):
class QConvBN(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBNReLU6, self).__init__(quant_type, qi, qo, num_bits, e_bits)
def __init__(self, quant_type, conv_module, bn_module, qi=False, qo=True, num_bits=8, e_bits=3):
super(QConvBN, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
......@@ -768,16 +621,19 @@ class QConvBNReLU6(QModule):
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
else:
bias = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
self.conv_module.bias = torch.nn.Parameter(bias)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale*self.qw.scale,zero_point=0.)
def forward(self, x):
......@@ -818,8 +674,6 @@ class QConvBNReLU6(QModule):
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu6(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
......@@ -827,135 +681,103 @@ class QConvBNReLU6(QModule):
return x
def quantize_inference(self, x):
a = torch.tensor(6)
a = self.qo.quantize_tensor(a)
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is not 'POT':
# x = get_nearest_val(self.quant_type,x)
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point # 属于qo范围的数据
x.clamp_(min=0, max=a.item())
x = x + self.qo.zero_point
return x
class QModule_2(nn.Module):
def __init__(self,quant_type, qi0=True, qi1=True, qo=True, num_bits=8, e_bits=3):
super(QModule_2, self).__init__()
if qi0:
self.qi0 = QParam(quant_type,num_bits, e_bits) # qi在此处就已经被num_bits和mode赋值了
if qi1:
self.qi1 = QParam(quant_type,num_bits, e_bits) # qi在此处就已经被num_bits和mode赋值了
# 作为具体量化层的父类,qi和qo分别为量化输入/输出
# 用于处理多个层结果或qo以array形式传入
class QModule_array(nn.Module):
def __init__(self,quant_type,len,qi_array=False, qo=True, num_bits=8, e_bits=3):
super(QModule_array, self).__init__()
if qi_array:
for i in range(len):
self.add_module('qi%d'%i,QParam(quant_type,num_bits, e_bits))
if qo:
self.qo = QParam(quant_type,num_bits, e_bits) # qo在此处就已经被num_bits和mode赋值了
self.qo = QParam(quant_type,num_bits, e_bits)
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.bias_qmax = bias_qmax(quant_type)
self.len = len
def freeze(self):
pass
def fakefreeze(self):
pass
pass # 空语句
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
class QConcat(QModule_array):
def __init__(self, quant_type, len, qi_array=False, qo=True, num_bits=8, e_bits=3):
super(QConcat,self).__init__(quant_type, len, qi_array, qo, num_bits, e_bits)
for i in range(len):
self.register_buffer('M%d'%i,torch.tensor([], requires_grad=False))
class QElementwiseAdd(QModule_2):
def __init__(self, quant_type, qi0=True, qi1=True, qo=True, num_bits=8, e_bits=3):
super(QElementwiseAdd, self).__init__(quant_type, qi0, qi1, qo, num_bits, e_bits)
self.register_buffer('M0', torch.tensor([], requires_grad=False)) # 将M注册为buffer
self.register_buffer('M1', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi0=None, qi1=None ,qo=None):
if hasattr(self, 'qi0') and qi0 is not None:
raise ValueError('qi0 has been provided in init function.')
if not hasattr(self, 'qi0') and qi0 is None:
raise ValueError('qi0 is not existed, should be provided.')
if hasattr(self, 'qi1') and qi0 is not None:
raise ValueError('qi1 has been provided in init function.')
if not hasattr(self, 'qi1') and qi0 is None:
raise ValueError('qi1 is not existed, should be provided.')
def freeze(self, qi_array=None, qo=None):
if qi_array is None:
raise ValueError('qi_array should be provided')
elif len(qi_array) != self.len:
raise ValueError('qi_array len no match')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
# 这里因为在池化或者激活的输入,不需要对最大值和最小是进行额外的统计,会共享相同的输出
if qi0 is not None:
self.qi0 = qi0
if qi1 is not None:
self.qi1 = qi1
for i in range(self.len):
self.add_module('qi%d'%i,qi_array[i])
if qo is not None:
self.qo = qo
# 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
self.M0.data = self.qi0.scale / self.qo.scale
self.M1.data = self.qi1.scale / self.qi0.scale
# self.M0.data = self.qi0.scale / self.qo.scale
# self.M1.data = self.qi1.scale / self.qo.scale
def forward(self, x0, x1): # 前向传播,输入张量,x为浮点型数据
if hasattr(self, 'qi0'):
self.qi0.update(x0)
x0 = FakeQuantize.apply(x0, self.qi0) # 对输入张量X完成量化
if hasattr(self, 'qi1'):
self.qi1.update(x1)
x1 = FakeQuantize.apply(x1, self.qi1) # 对输入张量X完成量化
x = x0 + x1
if hasattr(self, 'qo'):
for i in range(self.len):
getattr(self,'M%d'%i).data = (getattr(self,'qi%d'%i).scale / self.qo.scale).data
def forward(self,x_array):
outs=[]
for i in range(self.len):
x = x_array[i]
if hasattr(self,'qi%d'%i):
qi = getattr(self,'qi%d'%i)
qi.update(x)
x = FakeQuantize.apply(x,qi)
outs.append(x)
out = torch.cat(outs,1)
if hasattr(self,'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x0, x1): # 此处input为已经量化的qx
x0 = x0 - self.qi0.zero_point
x1 = x1 - self.qi1.zero_point
x = self.M0 * (x0 + x1*self.M1)
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QModule_3(nn.Module):
def __init__(self,quant_type, qix=True, qih=True, qic=True, qox=True, qoh=True, qoc=True, num_bits=8, e_bits=3):
super(QModule_3, self).__init__()
out = FakeQuantize.apply(out,self.qo)
return out
def quantize_inference(self, x_array):
outs=[]
for i in range(self.len):
qi = getattr(self,'qi%d'%i)
x = x_array[i] - qi.zero_point
x = getattr(self,'M%d'%i) * x
outs.append(x)
out = torch.concat(outs,1)
out = get_nearest_val(self.quant_type,out)
out = out + self.qo.zero_point
return out
class QModule_rnnbase(nn.Module):
def __init__(self,quant_type, qix=False, qih=False, qic=False, qox=True, qoh=True, qoc=True, num_bits=8, e_bits=3):
super(QModule_rnnbase,self).__init__()
if qix:
self.qix = QParam(quant_type,num_bits, e_bits)
if qox:
self.qox = QParam(quant_type,num_bits, e_bits)
self.qix = QParam(quant_type, num_bits, e_bits)
if qih:
self.qih = QParam(quant_type,num_bits, e_bits)
if qoh:
self.qoh = QParam(quant_type,num_bits, e_bits)
self.qih = QParam(quant_type, num_bits, e_bits)
if qic:
self.qic = QParam(quant_type,num_bits, e_bits)
self.qic = QParam(quant_type, num_bits, e_bits)
if qox:
self.qox = QParam(quant_type, num_bits, e_bits)
if qoh:
self.qoh = QParam(quant_type, num_bits, e_bits)
if qoc:
self.qoc = QParam(quant_type,num_bits, e_bits)
self.qoc = QParam(quant_type, num_bits, e_bits)
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
......@@ -964,59 +786,52 @@ class QModule_3(nn.Module):
def freeze(self):
pass # 空语句
def fakefreeze(self):
pass
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
class QLSTM(QModule_3):
def __init__(self, quant_type, lstm_module, qix=True, qih=True, qic=True, qox=True, qoh=True, qoc=True, num_bits=8, e_bits=3):
super(QLSTM, self).__init__(quant_type, qix, qih, qic, qox, qoh, qoc, num_bits, e_bits)
# has_hidden表征该层能否接受非零值的hidden输入
# 对于第一层,可能第一个batch输入为零值,后续非零,则has_hidden仍需置为true
# 需额外判断hidden是否为none,避免scale为0的情形
class QLSTM(QModule_rnnbase):
def __init__(self, quant_type, lstm_module, has_hidden, qix=False, qih=False, qic=False, qox=True, qoh=True, qoc=True, num_bits=8, e_bits=3):
super(QLSTM,self).__init__(quant_type,qix,qih,qic,qox,qoh,qoc,num_bits,e_bits)
self.lstm_module = lstm_module
self.qwih = QParam(quant_type, num_bits,e_bits)
self.qwhh = QParam(quant_type, num_bits,e_bits)
# self.qbih = QParam(quant_type, num_bits,e_bits)
# self.qbhh = QParam(quant_type, num_bits,e_bits)
# self.register_buffer('Mi', torch.tensor([], requires_grad=False)) # 将Mi注册为buffer
# self.register_buffer('Mh', torch.tensor([], requires_grad=False)) # 将M注册为buffer
# if self.lstm_module.bidirectional:
# self.qwihr = QParam(quant_type, num_bits,e_bits)
# self.qwhhr = QParam(quant_type, num_bits,e_bits)
# self.qbihr = QParam(quant_type, num_bits,e_bits)
# self.qbhhr = QParam(quant_type, num_bits,e_bits)
self.qwih = QParam(quant_type, num_bits, e_bits)
self.qwhh = QParam(quant_type, num_bits, e_bits)
self.qwihr = QParam(quant_type, num_bits, e_bits)
self.qwhhr= QParam(quant_type, num_bits, e_bits)
def freeze(self, qix=None, qih=None, qic=None,qox=None, qoh=None, qoc=None, flag=0):
# 表征是否输入hidden
self.has_hidden = has_hidden
def freeze(self, qix=None, qih=None, qic=None, qox=None, qoh=None, qoc=None):
if hasattr(self, 'qix') and qix is not None:
raise ValueError('qix has been provided in init function.')
if not hasattr(self, 'qix') and qix is None:
raise ValueError('qix is not existed, should be provided.')
if self.has_hidden:
if hasattr(self, 'qih') and qih is not None:
raise ValueError('qih has been provided in init function.')
if not hasattr(self, 'qih') and qih is None:
raise ValueError('qih is not existed, should be provided.')
if hasattr(self, 'qic') and qic is not None:
raise ValueError('qic has been provided in init function.')
if not hasattr(self, 'qic') and qic is None:
raise ValueError('qic is not existed, should be provided.')
if hasattr(self, 'qox') and qox is not None:
raise ValueError('qox has been provided in init function.')
if not hasattr(self, 'qox') and qox is None:
raise ValueError('qox is not existed, should be provided.')
if hasattr(self, 'qih') and qih is not None:
raise ValueError('qih has been provided in init function.')
if not hasattr(self, 'qih') and qih is None and flag==1: # 非第一个lstm layer
raise ValueError('qih is not existed, should be provided.')
if hasattr(self, 'qoh') and qoh is not None:
raise ValueError('qoh has been provided in init function.')
if not hasattr(self, 'qoh') and qoh is None:
raise ValueError('qoh is not existed, should be provided.')
if hasattr(self, 'qic') and qic is not None:
raise ValueError('qic has been provided in init function.')
if not hasattr(self, 'qic') and qic is None and flag==1: # 非第一个lstm layer
raise ValueError('qic is not existed, should be provided.')
if hasattr(self, 'qoc') and qoc is not None:
raise ValueError('qoc has been provided in init function.')
if not hasattr(self, 'qoc') and qoc is None:
......@@ -1024,376 +839,237 @@ class QLSTM(QModule_3):
if qix is not None:
self.qix = qix
if self.has_hidden:
if qih is not None:
self.qih = qih
if qic is not None:
self.qic = qic
if qox is not None:
self.qox = qox
# 为了避免第一个lstm layer没有h,c却拥有qih,qic
if qih is not None and flag==1:
self.qih = qih
if qoh is not None:
self.qoh = qoh
if qic is not None and flag==1:
self.qic = qic
if qoc is not None:
self.qoc = qoc
# 这里应该涉及到了两个问题:1. 输出、隐层输出 2. 双向
# 输出、隐层输出可以分别处理
# 双向比较麻烦,在量化后甚至还涉及到了SUM和CONCAT的整合方式
# self.Mi.data = (self.qwih.scale * self.qi.scale / self.qo.scale).data
# self.Mh.data = (self.qwhh.scale * self.qi.scale / self.qo.scale).data
# 对weight伪量化
self.lstm_module.weight_ih_l0.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0.data,self.qwih)
# 正向
self.lstm_module.weight_ih_l0.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0.data, self.qwih)
self.lstm_module.weight_hh_l0.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0.data, self.qwhh)
self.lstm_module.weight_hh_l0.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0.data,self.qwhh)
# 对bias伪量化
self.lstm_module.bias_ih_l0.data = quantize_tensor(self.quant_type,self.lstm_module.bias_ih_l0.data,scale=self.qix.scale*self.qwih.scale,zero_point=0,qmax=self.bias_qmax,is_bias=True)
self.lstm_module.bias_ih_l0.data = dequantize_tensor(self.lstm_module.bias_ih_l0.data,scale=self.qix.scale*self.qwih.scale,zero_point=0)
# 第一个layer是没有qih的,需要特殊处理
if flag==1:
self.lstm_module.bias_hh_l0.data = quantize_tensor(self.quant_type,self.lstm_module.bias_hh_l0.data,scale=self.qih.scale*self.qwhh.scale,zero_point=0,qmax=self.bias_qmax,is_bias=True)
self.lstm_module.bias_hh_l0.data = dequantize_tensor(self.lstm_module.bias_hh_l0.data,scale=self.qih.scale*self.qwhh.scale,zero_point=0)
self.lstm_module.bias_ih_l0.data = quantize_tensor(self.quant_type,
self.lstm_module.bias_ih_l0.data,
scale=self.qix.scale * self.qwih.scale,
zero_point=0., qmax=self.bias_qmax, is_bias=True)
self.lstm_module.bias_ih_l0.data = dequantize_tensor(self.lstm_module.bias_ih_l0.data,
scale=self.qix.scale * self.qwih.scale, zero_point=0.)
# 反向
self.lstm_module.weight_ih_l0_reverse.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0_reverse.data, self.qwihr)
self.lstm_module.weight_hh_l0_reverse.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0_reverse.data, self.qwhhr)
self.lstm_module.bias_ih_l0_reverse.data = quantize_tensor(self.quant_type,
self.lstm_module.bias_ih_l0_reverse.data,
scale=self.qix.scale * self.qwihr.scale,
zero_point=0., qmax=self.bias_qmax, is_bias=True)
self.lstm_module.bias_ih_l0_reverse.data = dequantize_tensor(self.lstm_module.bias_ih_l0_reverse.data,
scale=self.qix.scale * self.qwihr.scale, zero_point=0.)
# 若是为第一层lstm,就不量化bias_hh了
if self.has_hidden:
# 正向
self.lstm_module.bias_hh_l0.data = quantize_tensor(self.quant_type,
self.lstm_module.bias_hh_l0.data,
scale=self.qih.scale * self.qwhh.scale,
zero_point=0., qmax=self.bias_qmax, is_bias=True)
self.lstm_module.bias_hh_l0.data = dequantize_tensor(self.lstm_module.bias_hh_l0.data,
scale=self.qih.scale * self.qwhh.scale, zero_point=0.)
# 反向
self.lstm_module.bias_hh_l0_reverse.data = quantize_tensor(self.quant_type,
self.lstm_module.bias_hh_l0_reverse.data,
scale=self.qih.scale * self.qwhhr.scale,
zero_point=0., qmax=self.bias_qmax, is_bias=True)
self.lstm_module.bias_hh_l0_reverse.data = dequantize_tensor(self.lstm_module.bias_hh_l0_reverse.data,scale=self.qih.scale * self.qwhhr.scale, zero_point=0.)
def forward(self, x, hidden=None):
# if self.has_hidden and hidden is None:
# raise ValueError("LSTM layer with has_hidden must accept hidden")
if not self.has_hidden and hidden is not None:
raise ValueError("LSTM layer without has_hidden cannot accept hidden")
def forward(self, x, h=None, c=None):
if hasattr(self, 'qix'):
self.qix.update(x)
x = FakeQuantize.apply(x, self.qix)
if hasattr(self, 'qih') and h is not None: # 兼顾第一个lstm layer无h,qih
x = FakeQuantize.apply(x,self.qix)
if self.has_hidden and hidden is not None:
h,c = hidden
if hasattr(self, 'qih'):
self.qih.update(h)
h = FakeQuantize.apply(h, self.qih)
if hasattr(self, 'qic') and c is not None: # 兼顾第一个lstm layer无c,qic
if hasattr(self, 'qic'):
self.qic.update(c)
c = FakeQuantize.apply(c, self.qic)
self.qwih.update(self.lstm_module.weight_ih_l0.data)
self.qwhh.update(self.lstm_module.weight_hh_l0.data)
layer = nn.LSTM(input_size=self.lstm_module.input_size,
hidden_size=self.lstm_module.hidden_size,
num_layers=1,
batch_first=False,
bidirectional=self.lstm_module.bidirectional,
bias=True)
layer.weight_ih_l0.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0.data,self.qwih)
layer.weight_hh_l0.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0.data,self.qwhh)
layer.bias_ih_l0.data = self.lstm_module.bias_ih_l0.data
layer.bias_hh_l0.data = self.lstm_module.bias_hh_l0.data
if h is None:
x, (h, c) = layer(x)
self.qwihr.update(self.lstm_module.weight_ih_l0_reverse.data)
self.qwhhr.update(self.lstm_module.weight_hh_l0_reverse.data)
# 由于该层实现与Conv等不同,若使用_VF.lstm,weight需要flatten
# 为简化调用逻辑,仍使用nn.LSTM新建层进行调用
tmplayer = nn.LSTM(input_size = self.lstm_module.input_size,
hidden_size = self.lstm_module.hidden_size,
num_layers = 1,
batch_first = False,
bidirectional = self.lstm_module.bidirectional,
bias = self.lstm_module.bias,
dropout = self.lstm_module.dropout)
# 正向
tmplayer.weight_ih_l0.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0.data, self.qwih)
tmplayer.weight_hh_l0.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0.data, self.qwhh)
tmplayer.bias_ih_l0.data = self.lstm_module.bias_ih_l0.data
tmplayer.bias_hh_l0.data = self.lstm_module.bias_hh_l0.data
# 反向
tmplayer.weight_ih_l0_reverse.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0_reverse.data, self.qwihr)
tmplayer.weight_hh_l0_reverse.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0_reverse.data, self.qwhhr)
tmplayer.bias_ih_l0_reverse.data = self.lstm_module.bias_ih_l0_reverse.data
tmplayer.bias_hh_l0_reverse.data = self.lstm_module.bias_hh_l0_reverse.data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tmplayer.to(device)
if self.has_hidden and hidden is not None:
x, (h,c) = tmplayer(x,(h,c))
else:
x, (h, c) = layer(x, (h, c))
x, (h,c) = tmplayer(x)
if hasattr(self, 'qox'):
if hasattr(self,'qox'):
self.qox.update(x)
x = FakeQuantize.apply(x, self.qox)
if hasattr(self, 'qoh'):
x = FakeQuantize.apply(x,self.qox)
if hasattr(self,'qoh'):
self.qoh.update(h)
h = FakeQuantize.apply(h, self.qoh)
if hasattr(self, 'qoc'):
h = FakeQuantize.apply(h,self.qoh)
if hasattr(self,'qoc'):
self.qoc.update(c)
c = FakeQuantize.apply(c, self.qoc)
return x,(h,c)
def quantize_inference(self, x, h=None, c=None):
# freeze的时是fakequantize,因此这里直接算,无需做scale变换
if h is None:
x, (h, c) = self.lstm_module(x)
if hasattr(self, 'qox'):
x = FakeQuantize.apply(x, self.qox)
if hasattr(self, 'qoh'):
h = FakeQuantize.apply(h, self.qoh)
if hasattr(self, 'qoc'):
c = FakeQuantize.apply(c, self.qoc)
else:
x, (h, c) = self.lstm_module(x, (h, c))
if hasattr(self, 'qox'):
x = FakeQuantize.apply(x, self.qox)
if hasattr(self, 'qoh'):
h = FakeQuantize.apply(h, self.qoh)
if hasattr(self, 'qoc'):
c = FakeQuantize.apply(c, self.qoc)
return x,(h,c)
# new modules for full-precision model - fold bn
# inference应该也需要相应的适配
class ConvBNReLU(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBNReLU, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
x.clamp_(min=0)
return x
class ConvBN(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBN, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
c = FakeQuantize.apply(c,self.qoc)
hidden = (h,c)
return x, hidden
def quantize_inference(self, x, hidden=None):
# if self.has_hidden and hidden is None:
# raise ValueError("LSTM layer with has_hidden must accept hidden")
# if not self.has_hidden and hidden is not None:
# raise ValueError("LSTM layer without has_hidden cannot accept hidden")
# if self.has_hidden:
# x, hidden = self.lstm_module(x,hidden)
# else:
# x, hidden = self.lstm_module(x)
# h,c = hidden
# if hasattr(self,'qox'):
# x = FakeQuantize.apply(x, self.qox)
# if hasattr(self,'qoh'):
# h = FakeQuantize.apply(h, self.qoh)
# if hasattr(self,'qoc'):
# c = FakeQuantize.apply(c, self.qoc)
# hidden = (h,c)
# return x,hidden
# if self.has_hidden and hidden is None:
# raise ValueError("LSTM layer with has_hidden must accept hidden")
if not self.has_hidden and hidden is not None:
raise ValueError("LSTM layer without has_hidden cannot accept hidden")
def fakefreeze(self):
pass
def forward(self, x):
if hasattr(self, 'qix'):
x = FakeQuantize.apply(x,self.qix)
if self.has_hidden and hidden is not None:
h,c = hidden
if hasattr(self, 'qih'):
h = FakeQuantize.apply(h, self.qih)
if hasattr(self, 'qic'):
c = FakeQuantize.apply(c, self.qic)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
# 由于该层实现与Conv等不同,若使用_VF.lstm,weight需要flatten
# 为简化调用逻辑,仍使用nn.LSTM新建层进行调用
tmplayer = nn.LSTM(input_size = self.lstm_module.input_size,
hidden_size = self.lstm_module.hidden_size,
num_layers = 1,
batch_first = False,
bidirectional = self.lstm_module.bidirectional,
bias = self.lstm_module.bias,
dropout = self.lstm_module.dropout)
# 正向
tmplayer.weight_ih_l0.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0.data, self.qwih)
tmplayer.weight_hh_l0.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0.data, self.qwhh)
tmplayer.bias_ih_l0.data = self.lstm_module.bias_ih_l0.data
tmplayer.bias_hh_l0.data = self.lstm_module.bias_hh_l0.data
# 反向
tmplayer.weight_ih_l0_reverse.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0_reverse.data, self.qwihr)
tmplayer.weight_hh_l0_reverse.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0_reverse.data, self.qwhhr)
tmplayer.bias_ih_l0_reverse.data = self.lstm_module.bias_ih_l0_reverse.data
tmplayer.bias_hh_l0_reverse.data = self.lstm_module.bias_hh_l0_reverse.data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tmplayer.to(device)
if self.has_hidden and hidden is not None:
x, (h,c) = tmplayer(x,(h,c))
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x, (h,c) = tmplayer(x)
if hasattr(self,'qox'):
x = FakeQuantize.apply(x,self.qox)
if hasattr(self,'qoh'):
h = FakeQuantize.apply(h,self.qoh)
if hasattr(self,'qoc'):
c = FakeQuantize.apply(c,self.qoc)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
return x
hidden = (h,c)
return x, hidden
class ConvBNReLU6(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBNReLU6, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
class QEmbedding(QModule):
def __init__(self,quant_type,embedding_module,qi=False, qo=True, num_bits=8, e_bits=3):
super(QEmbedding, self).__init__(quant_type,qi,qo, num_bits, e_bits)
self.embedding_module = embedding_module
self.qw = QParam(quant_type, num_bits, e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False))
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
def freeze(self, qo=None):
#输入为index,不量化
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
if qo is not None:
self.qo = qo
def fakefreeze(self):
pass
self.M.data = (self.qw.scale / self.qo.scale).data
self.embedding_module.weight.data = self.qw.quantize_tensor(self.embedding_module.weight.data)
def forward(self, x):
self.qw.update(self.embedding_module.weight.data)
tmp_wgt = FakeQuantize.apply(self.embedding_module.weight, self.qw)
# tmp_wgt = self.embedding_module.weight
x = F.embedding(x, tmp_wgt)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu6(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
x.clamp_(min=0,max=6)
# x = self.embedding_module(x)
# x = self.M * x
# x = get_nearest_val(self.quant_type, x)
# return x
tmp_wgt = FakeQuantize.apply(self.embedding_module.weight, self.qw)
# tmp_wgt = self.embedding_module.weight
x = F.embedding(x, tmp_wgt)
x = FakeQuantize.apply(x, self.qo)
return x
\ No newline at end of file
CRNN(
11.77 M, 100.000% Params, 1.26 GMac, 100.000% MACs,
(layers): ModuleDict(
11.77 M, 100.000% Params, 1.26 GMac, 100.000% MACs,
(conv1): Conv2d(640, 0.005% Params, 3.28 MMac, 0.260% MACs, 1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU(0, 0.000% Params, 327.68 KMac, 0.026% MACs, )
(pool1): MaxPool2d(0, 0.000% Params, 327.68 KMac, 0.026% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(73.86 k, 0.628% Params, 94.54 MMac, 7.494% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu2): ReLU(0, 0.000% Params, 163.84 KMac, 0.013% MACs, )
(pool2): MaxPool2d(0, 0.000% Params, 163.84 KMac, 0.013% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv3): Conv2d(295.17 k, 2.508% Params, 94.45 MMac, 7.487% MACs, 128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn3): BatchNorm2d(512, 0.004% Params, 163.84 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU(0, 0.000% Params, 81.92 KMac, 0.006% MACs, )
(conv4): Conv2d(590.08 k, 5.014% Params, 188.83 MMac, 14.968% MACs, 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu4): ReLU(0, 0.000% Params, 81.92 KMac, 0.006% MACs, )
(pool4): MaxPool2d(0, 0.000% Params, 81.92 KMac, 0.006% MACs, kernel_size=(2, 2), stride=(2, 1), padding=(0, 1), dilation=1, ceil_mode=False)
(conv5): Conv2d(1.18 M, 10.029% Params, 193.55 MMac, 15.342% MACs, 256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn5): BatchNorm2d(1.02 k, 0.009% Params, 167.94 KMac, 0.013% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu5): ReLU(0, 0.000% Params, 83.97 KMac, 0.007% MACs, )
(conv6): Conv2d(2.36 M, 20.053% Params, 387.01 MMac, 30.677% MACs, 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu6): ReLU(0, 0.000% Params, 83.97 KMac, 0.007% MACs, )
(pool6): MaxPool2d(0, 0.000% Params, 83.97 KMac, 0.007% MACs, kernel_size=(2, 2), stride=(2, 1), padding=(0, 1), dilation=1, ceil_mode=False)
(conv7): Conv2d(1.05 M, 8.915% Params, 43.01 MMac, 3.409% MACs, 512, 512, kernel_size=(2, 2), stride=(1, 1))
(bn7): BatchNorm2d(1.02 k, 0.009% Params, 41.98 KMac, 0.003% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu7): ReLU(0, 0.000% Params, 20.99 KMac, 0.002% MACs, )
(lstm1): LSTM(1.58 M, 13.401% Params, 64.87 MMac, 5.142% MACs, 512, 256, bidirectional=True)
(fc1): Linear(131.33 k, 1.116% Params, 5.37 MMac, 0.426% MACs, in_features=512, out_features=256, bias=True)
(lstm2): LSTM(1.05 M, 8.945% Params, 43.37 MMac, 3.438% MACs, 256, 256, bidirectional=True)
(fc2): Linear(3.46 M, 29.364% Params, 141.41 MMac, 11.209% MACs, in_features=512, out_features=6736, bias=True)
)
)
\ No newline at end of file
# -*- coding: utf-8 -*-
from torch.serialization import load
from model import *
# from extract_ratio import *
from extract_ratio import *
from train import parse_arg
from model import *
from utils import *
from lstm_utils import *
from dataset import get_dataset
from config.alphabets import *
from torch.utils.data import DataLoader
import gol
import openpyxl
......@@ -18,161 +25,117 @@ import torch.utils.bottleneck as bn
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
import json
from decoder import seq_mnist_decoder
from data import seq_mnist_train, seq_mnist_val
from torch.utils.data import DataLoader
import random
class objdict(dict):
def __getattr__(self, name):
if name in self:
return self[name]
else:
raise AttributeError("No such attribute: " + name)
def __setattr__(self, name, value):
self[name] = value
def direct_quantize(config, val_loader, dataset, converter, model, criterion, device):
def __delattr__(self, name):
if name in self:
del self[name]
else:
raise AttributeError("No such attribute: " + name)
# def direct_quantize(model, test_loader,device):
# for i, (data, target) in enumerate(test_loader, 1):
# data = data.to(device)
# output = model.quantize_forward(data).cpu()
# if i % 500 == 0:
# break
# print('direct quantization finish')
# def full_inference(model, test_loader, device):
# correct = 0
# for i, (data, target) in enumerate(test_loader, 1):
# data = data.to(device)
# output = model(data).cpu()
# pred = output.argmax(dim=1, keepdim=True)
# correct += pred.eq(target.view_as(pred)).sum().item()
# print('\nTest set: Full Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
# return 100. * correct / len(test_loader.dataset)
# def quantize_inference(model, test_loader, device):
# correct = 0
# for i, (data, target) in enumerate(test_loader, 1):
# data = data.to(device)
# output = model.quantize_inference(data).cpu()
# pred = output.argmax(dim=1, keepdim=True)
# correct += pred.eq(target.view_as(pred)).sum().item()
# print('Test set: Quant Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
# return 100. * correct / len(test_loader.dataset)
def direct_quantize(model, val_loader , val_data ,args , trainer_params, decoder, criterion):
model.eval()
loss_value = 0
for i, (item) in enumerate(val_loader):
data, labels, output_len, lab_len = item
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
n_correct = 0
with torch.no_grad():
for i, (inp, idx) in enumerate(val_loader):
inp = inp.to(device)
# inference
preds = model.quantize_forward(inp)
print('direct quantization finish')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
output = model.quantize_forward(data)
# if i % 500 == 0:
# # break
def full_inference(config, val_loader, dataset, converter, model, criterion, device):
losses = AverageMeter()
model.eval()
print('direct quantization finish')
n_correct = 0
with torch.no_grad():
for i, (inp, idx) in enumerate(val_loader):
# 一个batch的label的list
labels = get_batch_label(dataset, idx)
inp = inp.to(device)
# inference
preds = model(inp).cpu()
# loss_value /= (len(val_data)//trainer_params.test_batch_size)
# # loss_value = loss_value[0]
# loss_value = loss_value.item()
# print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
# compute loss
batch_size = inp.size(0)
text, length = converter.encode(labels)
preds_size = torch.IntTensor([preds.size(0)] * batch_size)
loss = criterion(preds, text, preds_size, length)
def full_inference(model, val_loader , val_data ,args , trainer_params, decoder, criterion):
model.eval()
loss_value = 0
for i, (item) in enumerate(val_loader):
data, labels, output_len, lab_len = item
losses.update(loss.item(), inp.size(0))
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
_, preds = preds.max(2)
preds = preds.transpose(1, 0).contiguous().view(-1)
# sim_preds是decode后的,得到了一个str或者一个str list (也是一个batch的)
sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
# target应该是一个list 这里相当于在判断字符串是否相等
for pred, target in zip(sim_preds, labels):
if pred == target:
n_correct += 1
# if (i + 1) % config.PRINT_FREQ == 0:
# print('Epoch: [{0}][{1}/{2}]'.format(epoch, i, len(val_loader)))
# data = data.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
# # 只打印展示前 config.TEST.NUM_TEST_DISP 个句子的对比
# raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:config.TEST.NUM_TEST_DISP]
# for raw_pred, pred, gt in zip(raw_preds, sim_preds, labels):
# print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))
output = model(data)
index = random.randint(0,trainer_params.test_batch_size-1)
label = labels[index*trainer_params.word_size:(index+1)*trainer_params.word_size].data.numpy()
label = label-1
prediction = decoder.decode(output[:,index,:], output_len[index], lab_len[index])
accuracy = decoder.hit(prediction, label)
num_test_sample = len(dataset)
print("[#Full Precision Model: correct:{} / #total:{}]".format(n_correct, num_test_sample))
accuracy = n_correct / float(num_test_sample)
print('Full Precision Model: Test loss: {:.4f}, accuray: {:.4f}'.format(losses.avg, accuracy))
return accuracy
print("Sample Label = {}".format(decoder.to_string(label)))
print("Sample Prediction = {}".format(decoder.to_string(prediction)))
print("Full Model Accuracy on Sample = {:.2f}%\n\n".format(accuracy))
loss = criterion(output, labels, output_len, lab_len)
# loss_value += loss.data.numpy()
loss_value += loss.cpu().data.numpy()
def quantize_inference(config, val_loader, dataset, converter, model, criterion, device):
losses = AverageMeter()
model.eval()
loss_value /= (len(val_data)//trainer_params.test_batch_size)
# loss_value = loss_value[0]
loss_value = loss_value.item()
print("Full Model Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
n_correct = 0
with torch.no_grad():
for i, (inp, idx) in enumerate(val_loader):
def quantize_inference(model, val_loader , val_data ,args , trainer_params, decoder, criterion):
model.eval()
loss_value = 0
for i, (item) in enumerate(val_loader):
data, labels, output_len, lab_len = item
# 一个batch的label的list
labels = get_batch_label(dataset, idx)
inp = inp.to(device)
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
# inference
preds = model.quantize_inference(inp).cpu()
# data = data.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
# compute loss
batch_size = inp.size(0)
text, length = converter.encode(labels)
preds_size = torch.IntTensor([preds.size(0)] * batch_size)
loss = criterion(preds, text, preds_size, length)
output = model.quantize_inference(data)
losses.update(loss.item(), inp.size(0))
index = random.randint(0,trainer_params.test_batch_size-1)
label = labels[index*trainer_params.word_size:(index+1)*trainer_params.word_size].data.numpy()
label = label-1
prediction = decoder.decode(output[:,index,:], output_len[index], lab_len[index])
accuracy = decoder.hit(prediction, label)
_, preds = preds.max(2)
preds = preds.transpose(1, 0).contiguous().view(-1)
# sim_preds是decode后的,得到了一个str或者一个str list (也是一个batch的)
sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
# target应该是一个list 这里相当于在判断字符串是否相等
for pred, target in zip(sim_preds, labels):
if pred == target:
n_correct += 1
print("Sample Label = {}".format(decoder.to_string(label)))
print("Sample Prediction = {}".format(decoder.to_string(prediction)))
print("Quantize Model Accuracy on Sample = {:.2f}%\n\n".format(accuracy))
# if (i + 1) % config.PRINT_FREQ == 0:
# print('Epoch: [{0}][{1}/{2}]'.format(epoch, i, len(val_loader)))
loss = criterion(output, labels, output_len, lab_len)
# loss_value += loss.data.numpy()
loss_value += loss.cpu().data.numpy()
# # 只打印展示前 config.TEST.NUM_TEST_DISP 个句子的对比
# raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:config.TEST.NUM_TEST_DISP]
# for raw_pred, pred, gt in zip(raw_preds, sim_preds, labels):
# print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))
loss_value /= (len(val_data)//trainer_params.test_batch_size)
# loss_value = loss_value[0]
loss_value = loss_value.item()
print("Quantize Model Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
num_test_sample = len(dataset)
print("[#Quantization Model: correct:{} / #total:{}]".format(n_correct, num_test_sample))
accuracy = n_correct / float(num_test_sample)
print('Quantization Model: Test loss: {:.4f}, accuray: {:.4f}'.format(losses.avg, accuracy))
return accuracy
......@@ -190,86 +153,79 @@ def js_div(p_output, q_output, get_softmax=True):
if __name__ == "__main__":
# load config
config = parse_arg()
parser = argparse.ArgumentParser(description='PyTorch FP32 Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='ResNet18')
parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
parser.add_argument('-s', '--save', default=False, type=bool)
# parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
parser.add_argument('--params', '-p', type=str, default="default_trainer_params.json", help='Path to params JSON file. Default ignored when resuming.')
# 训练参数
args = parser.parse_args()
with open(args.params) as d:
trainer_params = json.load(d)
# trainer_params = json.load(d, object_hook=ascii_encode_dict)
trainer_params = objdict(trainer_params)
batch_size = args.batch_size
num_workers = args.workers
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
labels = [i for i in range(trainer_params.num_classes-1)]
decoder = seq_mnist_decoder(labels=labels)
criterion = nn.CTCLoss(blank=0, reduction='mean', zero_infinity=False)
random.seed(trainer_params.random_seed)
torch.manual_seed(trainer_params.random_seed)
# if args.cuda:
torch.cuda.manual_seed_all(trainer_params.random_seed)
# construct face related neural networks
model = get_crnn(config)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_data = seq_mnist_train(trainer_params)
val_data = seq_mnist_val(trainer_params)
train_loader = DataLoader(train_data, batch_size=trainer_params.batch_size, \
shuffle=True, num_workers=trainer_params.num_workers)
# define loss function
criterion = torch.nn.CTCLoss()
val_loader = DataLoader(val_data, batch_size=trainer_params.test_batch_size, \
shuffle=False, num_workers=trainer_params.num_workers)
train_dataset = get_dataset(config)(config, is_train=True)
train_loader = DataLoader(
dataset=train_dataset,
batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
shuffle=config.TRAIN.SHUFFLE,
num_workers=config.WORKERS,
pin_memory=config.PIN_MEMORY,
)
if args.model == 'LSTM-OCR':
model = BiLSTM(trainer_params)
val_dataset = get_dataset(config)(config, is_train=False)
val_loader = DataLoader(
dataset=val_dataset,
batch_size=config.TEST.BATCH_SIZE_PER_GPU,
shuffle=config.TEST.SHUFFLE,
num_workers=config.WORKERS,
pin_memory=config.PIN_MEMORY,
)
converter = strLabelConverter(config.DATASET.ALPHABETS)
# writer = SummaryWriter(log_dir='log/' + args.model + '/ptq')
save_dir = 'ckpt'
full_file = save_dir + '/mnist_' + trainer_params.reduce_bidirectional +'_' + str(trainer_params.bidirectional) + '.pt'
writer = SummaryWriter(log_dir='log/' + config.MODEL.NAME + '/ptq')
full_file = 'ckpt/360cc_' + config.MODEL.NAME + '.pt'
model.load_state_dict(torch.load(full_file))
model.to(device)
load_ptq = False
ptq_file_prefix = 'ckpt/mnist_' + trainer_params.reduce_bidirectional +'_' + str(trainer_params.bidirectional) + '_ptq_'
load_ptq = True
ptq_file_prefix = 'ckpt/360cc_' + config.MODEL.NAME + '_ptq_'
model.eval()
full_acc = full_inference(model, val_loader, val_data, args, trainer_params, decoder,criterion)
full_acc = full_inference(config, val_loader, val_dataset, converter, model, criterion, device)
# model_fold = fold_model(model) #
# full_params = []
model_fold = fold_model(model) # 可以得到conv,bn,relu,fc的各个层
# layer, par_ratio, flop_ratio = extract_ratio(args.model)
# layer = []
# for name, param in model.named_parameters():
# if 'weight' in name:
# n = name.split('.')
# pre = '.'.join(n[:len(n)-1])
# # 提取出weight前的名字(就是这个层的名字,if weight是避免bias重复提取一遍名字)
# layer.append(pre)
full_params = []
# print('===================')
layer, par_ratio, flop_ratio = extract_ratio('lstm-ocr')
# print(layer)
layer = []
for name, param in model.named_parameters():
# 提取出weight前的名字(就是这个层的名字)
n = name.split('.')
pre = '.'.join(n[:len(n)-1])
# 避免重复添加
if pre not in layer:
layer.append(pre)
# par_ratio, flop_ratio = fold_ratio(layer, par_ratio, flop_ratio)
# for name, param in model_fold.named_parameters():
# if 'bn' in name or 'sample.1' in name:
# continue
# param_norm = param.data.cpu()
# full_params.append(param_norm) # 没统计bn的 只统计了conv的 而且还是fold后的
# writer.add_histogram(tag='Full_' + name + '_data', values=param.data)
print('===================')
# print(layer)
par_ratio, flop_ratio = fold_ratio(layer, par_ratio, flop_ratio)
# sys.exit()
for name, param in model_fold.named_parameters():
if 'bn' in name or 'sample.1' in name:
continue
# param_norm = F.normalize(param.data.cpu(),p=2,dim=-1)
param_data = param.data.cpu()
full_params.append(param_data) # 统计了fold后的conv以及fc的weight和bias
writer.add_histogram(tag='Full_' + name + '_data', values=param.data)
# print(f"full-precision after Fold: name:{name}, param:{param.data}, param_data:{param_data}")
gol._init()
......@@ -291,10 +247,10 @@ if __name__ == "__main__":
for num_bits in num_bit_list:
e_bit_list = ebit_list(quant_type,num_bits)
for e_bits in e_bit_list:
# model_ptq = resnet18()
if args.model == 'LSTM-OCR':
model_ptq = BiLSTM(trainer_params)
# model_ptq = MobileNetV2()
model_ptq = get_crnn(config)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_ptq = model_ptq.to(device)
if quant_type == 'FLOAT':
title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
......@@ -319,59 +275,62 @@ if __name__ == "__main__":
model_ptq.to(device)
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.eval()
direct_quantize(model_ptq, val_loader, val_data, args, trainer_params, decoder,criterion)
direct_quantize(config, val_loader, val_dataset, converter, model_ptq, criterion, device)
# if args.save == True:
# torch.save(model_ptq.state_dict(), ptq_file_prefix + title + '.pt')
model_ptq.freeze()
quantize_inference(model_ptq, val_loader, val_data, args, trainer_params, decoder,criterion)
# ptq_acc = quantize_inference(model_ptq, val_loader, val_data, args, trainer_params, decoder,criterion)
# ptq_acc_list.append(ptq_acc)
# acc_loss = (full_acc - ptq_acc) / full_acc
# acc_loss_list.append(acc_loss)
# idx = -1
ptq_acc = quantize_inference(config, val_loader, val_dataset, converter, model_ptq, criterion, device)
ptq_acc_list.append(ptq_acc)
acc_loss = (full_acc - ptq_acc) / full_acc
acc_loss_list.append(acc_loss)
idx = -1
# 对权值参数反量化
model_ptq.fakefreeze()
# 获取计算量/参数量下的js-div
js_flops = 0.
js_param = 0.
for name, param in model_ptq.named_parameters():
# if '.' not in name or 'bn' in name:
if 'bn' in name or 'sample.1' in name:
continue
writer.add_histogram(tag=title +':'+ name + '_data', values=param.data)
idx = idx + 1
# renset中有多个. 需要改写拼一下
# prefix = name.split('.')[0]
n = name.split('.')
prefix = '.'.join(n[:len(n) - 1])
# weight和bias 1:1 ? 对于ratio,是按层赋予的,此处可以对weight和bias再单独赋予不同的权重,比如(8:2)
# layer中只有conv,bn,fc, prefix只可能是conv,fc (顺序的,weight和bias都有)
if prefix in layer:
layer_idx = layer.index(prefix)
ptq_param = param.data.cpu()
# 取L2范数
# ptq_norm = F.normalize(ptq_param,p=2,dim=-1)
writer.add_histogram(tag=title +':'+ name + '_data', values=ptq_param)
# print(name)
# print('=========')
# print(ptq_norm)
# print('=========')
# print(full_params[idx])
# full_params中只有conv和fc 且是顺序下来的(weight和bias都有)
js = js_div(ptq_param,full_params[idx]) # 这里算了fold后的量化前后模型的js距离
js = js.item()
origin_js = js
# print()
if js < 0.:
js = 0.
js_flops = js_flops + js * flop_ratio[layer_idx]
js_param = js_param + js * par_ratio[layer_idx]
print(f"prefix:{prefix}, layer_idx:{layer_idx}")
# print(f"layer name:{prefix} layer_idx is:{layer_idx} origin_js is {origin_js} js is {js} flop_ratio is {flop_ratio[layer_idx]} par_ratio is {par_ratio[layer_idx]} js_flops is {js_flops} js_param is {js_param}")
js_flops_list.append(js_flops)
js_param_list.append(js_param)
print(title + ': js_flops: %f js_param: %f acc_loss: %f' % (js_flops, js_param, acc_loss))
# for name, param in model_ptq.named_parameters():
# # if '.' not in name or 'bn' in name:
# if 'bn' in name or 'sample.1' in name:
# continue
# writer.add_histogram(tag=title +':'+ name + '_data', values=param.data)
# idx = idx + 1
# # renset中有多个. 需要改写拼一下
# # prefix = name.split('.')[0]
# n = name.split('.')
# prefix = '.'.join(n[:len(n) - 1])
# # weight和bias 1:1 ? 对于ratio,是按层赋予的,此处可以对weight和bias再单独赋予不同的权重,比如(8:2)
# if prefix in layer:
# layer_idx = layer.index(prefix)
# ptq_param = param.data.cpu()
# # 取L2范数
# # ptq_norm = F.normalize(ptq_param,p=2,dim=-1)
# ptq_norm = ptq_param
# writer.add_histogram(tag=title +':'+ name + '_data', values=ptq_param)
# # print(name)
# # print('=========')
# # print(ptq_norm)
# # print('=========')
# # print(full_params[idx])
# js = js_div(ptq_norm,full_params[idx]) # 这里算了fold后的量化前后模型的js距离
# js = js.item()
# if js < 0.:
# js = 0.
# js_flops = js_flops + js * flop_ratio[layer_idx]
# js_param = js_param + js * par_ratio[layer_idx]
# js_flops_list.append(js_flops)
# js_param_list.append(js_param)
# print(title + ': js_flops: %f js_param: %f acc_loss: %f' % (js_flops, js_param, acc_loss))
sys.exit()
# 写入xlsx
workbook = openpyxl.Workbook()
worksheet = workbook.active
......@@ -389,10 +348,10 @@ if __name__ == "__main__":
worksheet.cell(row=i+4, column=4, value=ptq_acc_list[i])
worksheet.cell(row=i+4, column=5, value=acc_loss_list[i])
workbook.save('ptq_result_' + args.model + '.xlsx')
workbook.save('ptq_result_' + config.MODEL.NAME + '.xlsx')
writer.close()
ft = open('ptq_result_' + args.model + '.txt','w')
ft = open('ptq_result_' + config.MODEL.NAME + '.txt','w')
print('title_list:',file=ft)
print(" ".join(title_list),file=ft)
......
title_list:
INT_2 INT_3 INT_4 INT_5 INT_6 INT_7 INT_8 INT_9 INT_10 INT_11 INT_12 INT_13 INT_14 INT_15 INT_16 POT_2 POT_3 POT_4 POT_5 POT_6 POT_7 POT_8 FLOAT_3_E1 FLOAT_4_E1 FLOAT_4_E2 FLOAT_5_E1 FLOAT_5_E2 FLOAT_5_E3 FLOAT_6_E1 FLOAT_6_E2 FLOAT_6_E3 FLOAT_6_E4 FLOAT_7_E1 FLOAT_7_E2 FLOAT_7_E3 FLOAT_7_E4 FLOAT_7_E5 FLOAT_8_E1 FLOAT_8_E2 FLOAT_8_E3 FLOAT_8_E4 FLOAT_8_E5 FLOAT_8_E6
js_flops_list:
938.7733335552051 422.2961529040284 112.00572935619252 28.15632049948652 6.580025053557225 1.593147596698573 0.39127389747221836 0.09726359211545822 0.02424534011691382 0.006096811349829773 0.0015819717562818567 0.0004088973671629054 0.00010591612996765123 4.215430786749398e-05 3.358851529044255e-05 938.0795941095593 276.1298308688724 43.56510410351131 43.31743097440866 43.31651992748872 43.317170796987085 43.316909690885424 245.27340648023244 80.55429874312716 50.01809924502505 38.66623068838267 17.323188075553762 12.265772098808744 26.78403199897619 9.167642811282255 3.1779184189380314 12.23167946700013 22.667666751272108 6.595860316723723 0.8016993676302422 3.1649891963442665 12.231686340919987 21.030391880048075 5.630180110297391 0.20824827230948417 0.7939111200807045 3.164995719146672 12.23168958358928
js_param_list:
1083.2152054453256 407.68102883090796 142.77271018968833 40.694342955319094 9.528033474559528 2.3056188772063395 0.5663514708991146 0.1405112916168392 0.03504902772537398 0.008813186313906787 0.0022363630716486275 0.000566627677334268 0.00014559649495622196 4.1442517690921754e-05 3.062477331841085e-05 1083.0293894297583 286.18087410960214 48.04799916302135 47.56581240023642 47.5640587789577 47.565362072997914 47.56507051619791 306.72068771644035 110.02866499298202 52.861756413549614 51.56753626886545 17.228751722832826 13.602914776865827 35.21196010598172 8.706064889198597 3.4432562752359903 13.53603392058786 29.65906818846962 6.118510949236464 0.8693586052802382 3.417883320602458 13.536039876528541 27.482646086738573 5.183734622246258 0.22716556760816775 0.8541492170710019 3.417896091306703 13.536039813988335
ptq_acc_list:
0.0 0.0 0.0 0.172 0.627 0.772 0.7856666666666666 0.791 0.791 0.7936666666666666 0.793 0.7936666666666666 0.794 0.7936666666666666 0.794 0.0 0.0 0.087 0.09433333333333334 0.086 0.09533333333333334 0.09366666666666666 0.0 0.001 0.0033333333333333335 0.18733333333333332 0.343 0.591 0.4836666666666667 0.5843333333333334 0.7566666666666667 0.5906666666666667 0.557 0.657 0.7823333333333333 0.7503333333333333 0.5883333333333334 0.597 0.6843333333333333 0.792 0.785 0.7486666666666667 0.5933333333333334
acc_loss_list:
1.0 1.0 1.0 0.7834662190516157 0.21065883340327318 0.02811582039446074 0.010910616869492292 0.004196391103650818 0.004196391103650818 0.0008392782207302194 0.001678556441460299 0.0008392782207302194 0.00041963911036503983 0.0008392782207302194 0.00041963911036503983 1.0 1.0 0.8904741921947126 0.8812421317666806 0.8917331095258079 0.8799832144355854 0.8820814099874108 1.0 0.9987410826689047 0.9958036088963492 0.7641628199748216 0.5681913554343264 0.2559798573227025 0.39110365086026017 0.26437263953000417 0.04741921947125467 0.25639949643306753 0.2987830465799412 0.17289131347041542 0.01510700797314311 0.055392362568191404 0.25933697020562313 0.24842635333613097 0.13848090642047836 0.002937473772555558 0.011749895090222373 0.057490558120016744 0.2530423835501468
## update 2023.5.14
### 采用了CRNN的结构实现LSTM-OCR,相比于之前的版本有了更好、更通用的处理OCR问题的能力。我使用了360CC数据集(中文OCR训练数据集,共360w张图片,我只用了其中3w训练数据和3k张测试数据)来训练和测试模型。
1. config文件夹里是配置文件:
- alphabets.py是字典
- 360CC_config.yaml是使用360CC数据集时的配置文件
- OWN_config.yaml是使用其他数据集时的配置文件
2. dataset文件夹:
- txt文件夹中的char_std_5990.txt是360cc数据集的字典
- test.txt中是配置测试集数据
- train.txt中是配置训练集数据
- _360cc.py用于构建360cc dataset来训练/测试
- _own.py用于构建自己的dataset来训练/测试
3. lstm_utils.py中是LSTM-OCR中需要用到的一些特定的函数、类等
4. module.py中增加了对LSTM量化相关的组件,补充了对于BiLSTM的部分权值参数的伪量化,修改了QLinear的quantize_inference方法,使其仍然是真量化。
5. train.py中的train和validate函数考虑了OCR问题的特点,有了较大修改。同理ptq.py中的direct_inference, full_inference, quantize_inference也有一定修改。
6. 由于QLSTM采用伪量化,而其他层采用真量化,因此在model.py中与QLSTM直接连接的层需要通过dequantize_tensor(x)或quantize_tensor(x)来处理下数据。
7. ptq_result_lstm-ocr.txt和ptq_result_lstm-ocr.xlsx是实验结果,仍采用acc作为评价指标,当一个句子被完全识别正确时,算是识别正确一次。
8. 拟合图像如下 <br>
flops:
<img src = "fig/flops_lstmocr.png" class="h-90 auto">
params:
<img src = "fig/params_lstmocr.png" class="h-90 auto">
## update 2023.5.4
......
import argparse
from easydict import EasyDict as edict
import yaml
import os
import torch
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
from model import *
from utils import *
from lstm_utils import *
from dataset import get_dataset
from config.alphabets import *
import time
import sys
from torch.utils.tensorboard import SummaryWriter
def parse_arg():
parser = argparse.ArgumentParser(description="train crnn")
# parser.add_argument('--cfg', help='experiment configuration filename', required=True, type=str)
parser.add_argument('--cfg', help='experiment configuration filename', type=str, default='config/360CC_config.yaml')
args = parser.parse_args()
with open(args.cfg, 'r') as f:
# config = yaml.load(f, Loader=yaml.FullLoader)
config = yaml.load(f,Loader=yaml.FullLoader)
config = edict(config)
config.DATASET.ALPHABETS = alphabet
config.MODEL.NUM_CLASSES = len(config.DATASET.ALPHABETS)
return config
def train(config, train_loader, dataset, converter, model, criterion, optimizer, device, epoch):
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
model.train()
end = time.time()
for i, (inp, idx) in enumerate(train_loader):
# measure data time
data_time.update(time.time() - end)
labels = get_batch_label(dataset, idx)
inp = inp.to(device)
# inference
preds = model(inp).cpu()
# compute loss
batch_size = inp.size(0)
text, length = converter.encode(labels) # length = 一个batch中的总字符长度, text = 一个batch中的字符所对应的下标
preds_size = torch.IntTensor([preds.size(0)] * batch_size) # timestep * batchsize
loss = criterion(preds, text, preds_size, length)
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.update(loss.item(), inp.size(0))
batch_time.update(time.time()-end)
if i % config.PRINT_FREQ == 0:
msg = 'Epoch: [{0}][{1}/{2}]\t' \
'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
'Speed {speed:.1f} samples/s\t' \
'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
'Loss {loss.val:.5f} ({loss.avg:.5f})\t'.format(
epoch, i, len(train_loader), batch_time=batch_time,
speed=inp.size(0)/batch_time.val,
data_time=data_time, loss=losses)
print(msg)
end = time.time()
def validate(config, val_loader, dataset, converter, model, criterion, device, epoch):
losses = AverageMeter()
model.eval()
n_correct = 0
with torch.no_grad():
for i, (inp, idx) in enumerate(val_loader):
# 一个batch的label的list
labels = get_batch_label(dataset, idx)
inp = inp.to(device)
# inference
preds = model(inp).cpu()
# compute loss
batch_size = inp.size(0)
text, length = converter.encode(labels)
preds_size = torch.IntTensor([preds.size(0)] * batch_size)
loss = criterion(preds, text, preds_size, length)
losses.update(loss.item(), inp.size(0))
_, preds = preds.max(2)
preds = preds.transpose(1, 0).contiguous().view(-1)
# sim_preds是decode后的,得到了一个str或者一个str list (也是一个batch的)
sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
# target应该是一个list 这里相当于在判断字符串是否相等
for pred, target in zip(sim_preds, labels):
if pred == target:
n_correct += 1
if (i + 1) % config.PRINT_FREQ == 0:
print('Epoch: [{0}][{1}/{2}]'.format(epoch, i, len(val_loader)))
# if i == config.TEST.NUM_TEST_BATCH: # 只检查这些数据的 (config.TEST.NUM_TEST_BATCH个batch)
# break
# 只打印展示前 config.TEST.NUM_TEST_DISP 个句子的对比
raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:config.TEST.NUM_TEST_DISP]
for raw_pred, pred, gt in zip(raw_preds, sim_preds, labels):
print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))
# num_test_sample = config.TEST.NUM_TEST_BATCH * config.TEST.BATCH_SIZE_PER_GPU
# if num_test_sample > len(dataset):
# num_test_sample = len(dataset)
num_test_sample = len(dataset)
print("[#correct:{} / #total:{}]".format(n_correct, num_test_sample))
accuracy = n_correct / float(num_test_sample)
print('Test loss: {:.4f}, accuray: {:.4f}'.format(losses.avg, accuracy))
return accuracy
def main():
# load config
config = parse_arg()
# construct face related neural networks
model = get_crnn(config)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# define loss function
criterion = torch.nn.CTCLoss()
# 上一轮训练的epoch (起始为0 )
last_epoch = config.TRAIN.BEGIN_EPOCH
train_dataset = get_dataset(config)(config, is_train=True)
train_loader = DataLoader(
dataset=train_dataset,
batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
shuffle=config.TRAIN.SHUFFLE,
num_workers=config.WORKERS,
pin_memory=config.PIN_MEMORY,
)
val_dataset = get_dataset(config)(config, is_train=False)
val_loader = DataLoader(
dataset=val_dataset,
batch_size=config.TEST.BATCH_SIZE_PER_GPU,
shuffle=config.TEST.SHUFFLE,
num_workers=config.WORKERS,
pin_memory=config.PIN_MEMORY,
)
optimizer = get_optimizer(config, model)
if isinstance(config.TRAIN.LR_STEP, list):
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer, config.TRAIN.LR_STEP,
config.TRAIN.LR_FACTOR, last_epoch-1
)
else:
lr_scheduler = torch.optim.lr_scheduler.StepLR(
optimizer, config.TRAIN.LR_STEP,
config.TRAIN.LR_FACTOR, last_epoch - 1
)
save_dir = 'ckpt'
if not os.path.isdir(save_dir):
os.makedirs(save_dir, mode=0o777)
os.chmod(save_dir, mode=0o777)
converter = strLabelConverter(config.DATASET.ALPHABETS)
# for resume, only
if config.TRAIN.RESUME.IS_RESUME:
model.load_state_dict(torch.load(save_dir + '/360cc_' + config.MODEL.NAME + '.pt'))
acc = validate(config, val_loader, val_dataset, converter, model, criterion, device, config.TRAIN.END_EPOCH)
print(f"test accuracy: {acc:.2f}%")
sys.exit()
best_acc = 0.0
for epoch in range(last_epoch, config.TRAIN.END_EPOCH):
# train
train(config, train_loader, train_dataset, converter, model, criterion, optimizer, device, epoch)
lr_scheduler.step()
# validate
acc = validate(config, val_loader, val_dataset, converter, model, criterion, device, epoch)
# save best ckpt
if config.TRAIN.SAVE and acc>best_acc:
torch.save(model.state_dict(), save_dir + '/360cc_' + config.MODEL.NAME + '.pt')
best_acc = max(acc, best_acc)
print("best acc is:", best_acc)
if __name__ == '__main__':
main()
\ No newline at end of file
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
def js_div(p_output, q_output, get_softmax=True):
"""
......@@ -167,3 +169,5 @@ def fold_bn(conv, bn):
return conv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment