python - 文件按条件将多行合并为一行?
怪我咯
怪我咯 2017-04-17 17:35:21
0
2
2033

在这个test文件里。如果ID和DATE都相等,那么把他们合并到一行。
ID,NAME,GENDER,AGE,KESHI,DATE,TYPE,DIAG保留第一行的值就好,PROJ_1至RE_6有值的话就依次往后面添加成一行。也就是ID和DATE都相同的话,就合并成一行。

test文件地址:http://pan.baidu.com/s/1slP4wsX

plus:
最最最期望得到的结果为:

谢谢大家的帮助!

怪我咯
怪我咯

走同样的路,发现不同的人生

reply all(2)
Ty80

Implemented one using Perl6

#!/usr/bin/env perl6

class StudentInfo {
    has Str $.id;
    has Str $.name;
    has Str $.gender;
    has Str $.age;
    has Str $.keshi;
    has Str $.date;
    has Str $.type;
    has Str $.diag;
    has %.index;

    method new (Str $line) {
        self.bless(|self.parse-line($line));
    }

    method parse-line(Str $line) {
        my @items = $line.split: ',';
        my @title = < id name gender age keshi date type diag >;
        my %hash;

        loop (my $i = +@title;$i < +@items;$i += 2) {
            if @items[$i] ne "" && @items[$i + 1] ne "" {        # 去除空的 proj re
                unless %hash{@items[$i]}:exists {
                    %hash{@items[$i]} = @items[$i + 1];
                }    
            }
        }
        my %ret = @title Z=> @items[0 .. +@title - 1];    # 拼接头部

        %ret<index> := %hash;

        return %ret;
    }

    method num-of-proj() {
        +%!index.keys;
    }

    method hash-key() {
        return $!id ~ $!name;
    }

    method meger(::?CLASS:D: $other) {
        for $other.index.keys -> $key {
            unless %!index{$key}:exists {
                %!index{$key} = $other.index{$key};    
            }
        }
    }

    # $max - 指标的个数参数
    method generate($max) {
        my @line = ($!id, $!name, $!gender, $!age, $!keshi, $!date, $!type, $!diag);

        my @keys := %!index.keys.sort;

        loop (my Int $i = 0;$i < $max;$i++) {
            if $i > @keys.elems - 1 {
                @line.append: ('', '');
            } else {
                @line.append: (~@keys[$i], ~%!index{@keys[$i]});
            }
        }
        return @line.join(',');
    }
}

#    o    输出
#    a    追加到    
#    i    指标个数
#    debug
#
sub MAIN(Str :o(:output($out))?, 
    Str :a(:append-to($append))?, 
    Int :i(:index-max($index)) = 8,
    Bool :d(:debug($debug)) = False, 
    *@files) {
    my %info;
    my @title;

    for @files -> $file {
        my @lines = $file.IO.lines;

        @title = @lines.shift.split: ',';

        for @lines -> $line {
            my StudentInfo $si .= new(~$line);    ## 

            note $si.perl if $debug;

            if %info{$si.hash-key}:exists {
                %info{$si.hash-key}.meger($si);
            } else {
                %info{$si.hash-key} := $si;
            }
        }
    }

    if $debug {
        for %info.values  {
            .note if $debug;
        }
    }

    @title = @title[^8];
    @title.append: (< PROJ_ RE_ > xx $index).flat Z~ ((1 ... $index) xx 2).flat.sort;

    if defined($append) || defined($out) {
        my $out-fh =  defined($append) ?? $append.IO.open(:a) !! $out.IO.open(:w);

        $out-fh.say(@title.join(',')) if defined($out);

        for %info.values -> $value {
            $out-fh.say: $value.generate($index) 
                if $value.num-of-proj >= $index;
        };

        $out-fh.close;
    } else {
        for %info.values -> $value {
            say $value.generate($index)
                if $value.num-of-proj >= $index;
        };
    }
}

How to use

[root@localhost tmp]# ./meger.p6 --help
Usage:
  ./meger.p6 [-o|--output=<Str>] [-a|--append-to=<Str>] [-i|--index-max=<Int>] [-d|--debug] [<files> ...]  
[root@localhost tmp]# ./meger.p6 -o=out.put.log testSheet.csv testSheet.csv 
[root@localhost tmp]# cat out.put.log 
ID,NAME,GENDER,AGE,KESHI,DATE,TYPE,DIAG,PROJ_1,RE_1,PROJ_2,RE_2,PROJ_3,RE_3,PROJ_4,RE_4,PROJ_5,RE_5,PROJ_6,RE_6,PROJ_7,RE_7,PROJ_8,RE_8
179802,彭永彪,男,82,神经内科,2013/1/1,血清,脑梗塞,ALP,88,ALT,8,AST,18,DBIL,3.4,GGT,32,IBIL,8.9,TBIL,12.3,TP,59.4
179099,王元家,男,39,手足显微外科,2013/1/1,血清,足外伤,ALP,58,ALT,32,AST,19,DBIL,2.1,GGT,44,IBIL,12.2,TBIL,14.3,TP,61.5
181012,潘国华,男,94,心肺血管科,2013/1/1,血清,高血压,ALP,84,ALT,10,AST,13,DBIL,1.5,GGT,34,IBIL,2.6,TBIL,4.1,TP,52.8
180813,朱安清,男,40,骨二科,2013/1/1,血清,足外伤,ALP,73,ALT,36,AST,19,DBIL,3.9,GGT,24,IBIL,18.6,TBIL,22.5,TP,59.6
180188,刘田英,女,80,综合一科,2013/1/1,血清,高血压,ALP,92,ALT,25,AST,24,DBIL,3,GGT,78,IBIL,7.3,TBIL,10.3,TP,64.9
178748,邓晓运,女,77,肿瘤科,2013/1/1,血清,脑梗塞,ALP,48,ALT,13,AST,16,DBIL,1.8,GGT,48,IBIL,6.1,TBIL,7.9,TP,63.3
180974,王龙,男,25,骨一科,2013/1/1,血清,肱骨干骨折,ALP,82,ALT,34,AST,36,DBIL,4.1,GGT,38,IBIL,11.6,TBIL,15.7,TP,61.7
180940,赵金成,男,79,综合一科,2013/1/1,血清,冠心病,ALP,66,ALT,54,AST,26,DBIL,5.5,GGT,30,IBIL,12.1,TBIL,17.6,TP,59
181168,张永堂,男,63,骨二科,2013/1/1,血清,肱骨干骨折,ALP,59,ALT,25,AST,35,DBIL,5.2,GGT,33,IBIL,14.4,TBIL,19.6,TP,57.1
[root@localhost tmp]# 
Ty80

You can put the records into a dictionary with ID and Date as keys. If the key does not exist in the dictionary, put it directly. If it already exists, add "PROJ_1 to RE_6" if they have values. ".

I’m not sure what you mean by “merging into one line”, but the general code can be similar to the following. If the merging method is wrong, you can modify it yourself:

import csv

result = {}
with open('/Volumes/MacDocuments/Downloads/testSheet.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        print(row)
        print(len(row))
        key = '{}-{}'.format(row[0], row[5])
        if result.get(key, None):
            result[key].extend(row[8:])
        else:
            result[key] = row

print(result)
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template