python - 文件按条件将多行合并为一行？

Question

在这个test文件里。如果ID和DATE都相等，那么把他们合并到一行。ID,NAME,GENDER,AGE,KESHI,DATE,TYPE,DIAG保留第一行的值就好，PROJ_1至RE_6有值的话就依次往后面添加成一行。也就是ID和DATE都相同的话，就合并成...

PHP中文网 · Answer

Implemented one using Perl6

#!/usr/bin/env perl6

class StudentInfo {
    has Str $.id;
    has Str $.name;
    has Str $.gender;
    has Str $.age;
    has Str $.keshi;
    has Str $.date;
    has Str $.type;
    has Str $.diag;
    has %.index;

    method new (Str $line) {
        self.bless(|self.parse-line($line));
    }

    method parse-line(Str $line) {
        my @items = $line.split: ',';
        my @title = < id name gender age keshi date type diag >;
        my %hash;

        loop (my $i = +@title;$i < +@items;$i += 2) {
            if @items[$i] ne "" && @items[$i + 1] ne "" {        # 去除空的 proj re
                unless %hash{@items[$i]}:exists {
                    %hash{@items[$i]} = @items[$i + 1];
                }    
            }
        }
        my %ret = @title Z=> @items[0 .. +@title - 1];    # 拼接头部

        %ret := %hash;

        return %ret;
    }

    method num-of-proj() {
        +%!index.keys;
    }

    method hash-key() {
        return $!id ~ $!name;
    }

    method meger(::?CLASS:D: $other) {
        for $other.index.keys -> $key {
            unless %!index{$key}:exists {
                %!index{$key} = $other.index{$key};    
            }
        }
    }

    # $max - 指标的个数参数
    method generate($max) {
        my @line = ($!id, $!name, $!gender, $!age, $!keshi, $!date, $!type, $!diag);

        my @keys := %!index.keys.sort;

        loop (my Int $i = 0;$i < $max;$i++) {
            if $i > @keys.elems - 1 {
                @line.append: ('', '');
            } else {
                @line.append: (~@keys[$i], ~%!index{@keys[$i]});
            }
        }
        return @line.join(',');
    }
}

#    o    输出
#    a    追加到    
#    i    指标个数
#    debug
#
sub MAIN(Str :o(:output($out))?, 
    Str :a(:append-to($append))?, 
    Int :i(:index-max($index)) = 8,
    Bool :d(:debug($debug)) = False, 
    *@files) {
    my %info;
    my @title;

    for @files -> $file {
        my @lines = $file.IO.lines;

        @title = @lines.shift.split: ',';

        for @lines -> $line {
            my StudentInfo $si .= new(~$line);    ## 

            note $si.perl if $debug;

            if %info{$si.hash-key}:exists {
                %info{$si.hash-key}.meger($si);
            } else {
                %info{$si.hash-key} := $si;
            }
        }
    }

    if $debug {
        for %info.values  {
            .note if $debug;
        }
    }

    @title = @title[^8];
    @title.append: (< PROJ_ RE_ > xx $index).flat Z~ ((1 ... $index) xx 2).flat.sort;

    if defined($append) || defined($out) {
        my $out-fh =  defined($append) ?? $append.IO.open(:a) !! $out.IO.open(:w);

        $out-fh.say(@title.join(',')) if defined($out);

        for %info.values -> $value {
            $out-fh.say: $value.generate($index) 
                if $value.num-of-proj >= $index;
        };

        $out-fh.close;
    } else {
        for %info.values -> $value {
            say $value.generate($index)
                if $value.num-of-proj >= $index;
        };
    }
}

How to use

[root@localhost tmp]# ./meger.p6 --help
Usage:
  ./meger.p6 [-o|--output=] [-a|--append-to=] [-i|--index-max=] [-d|--debug] [ ...]  
[root@localhost tmp]# ./meger.p6 -o=out.put.log testSheet.csv testSheet.csv 
[root@localhost tmp]# cat out.put.log 
ID,NAME,GENDER,AGE,KESHI,DATE,TYPE,DIAG,PROJ_1,RE_1,PROJ_2,RE_2,PROJ_3,RE_3,PROJ_4,RE_4,PROJ_5,RE_5,PROJ_6,RE_6,PROJ_7,RE_7,PROJ_8,RE_8
179802,彭永彪,男,82,神经内科,2013/1/1,血清,脑梗塞,ALP,88,ALT,8,AST,18,DBIL,3.4,GGT,32,IBIL,8.9,TBIL,12.3,TP,59.4
179099,王元家,男,39,手足显微外科,2013/1/1,血清,足外伤,ALP,58,ALT,32,AST,19,DBIL,2.1,GGT,44,IBIL,12.2,TBIL,14.3,TP,61.5
181012,潘国华,男,94,心肺血管科,2013/1/1,血清,高血压,ALP,84,ALT,10,AST,13,DBIL,1.5,GGT,34,IBIL,2.6,TBIL,4.1,TP,52.8
180813,朱安清,男,40,骨二科,2013/1/1,血清,足外伤,ALP,73,ALT,36,AST,19,DBIL,3.9,GGT,24,IBIL,18.6,TBIL,22.5,TP,59.6
180188,刘田英,女,80,综合一科,2013/1/1,血清,高血压,ALP,92,ALT,25,AST,24,DBIL,3,GGT,78,IBIL,7.3,TBIL,10.3,TP,64.9
178748,邓晓运,女,77,肿瘤科,2013/1/1,血清,脑梗塞,ALP,48,ALT,13,AST,16,DBIL,1.8,GGT,48,IBIL,6.1,TBIL,7.9,TP,63.3
180974,王龙,男,25,骨一科,2013/1/1,血清,肱骨干骨折,ALP,82,ALT,34,AST,36,DBIL,4.1,GGT,38,IBIL,11.6,TBIL,15.7,TP,61.7
180940,赵金成,男,79,综合一科,2013/1/1,血清,冠心病,ALP,66,ALT,54,AST,26,DBIL,5.5,GGT,30,IBIL,12.1,TBIL,17.6,TP,59
181168,张永堂,男,63,骨二科,2013/1/1,血清,肱骨干骨折,ALP,59,ALT,25,AST,35,DBIL,5.2,GGT,33,IBIL,14.4,TBIL,19.6,TP,57.1
[root@localhost tmp]#

PHP中文网 · Answer

You can put the records into a dictionary with ID and Date as keys. If the key does not exist in the dictionary, put it directly. If it already exists, add "PROJ_1 to RE_6" if they have values. ".

I’m not sure what you mean by “merging into one line”, but the general code can be similar to the following. If the merging method is wrong, you can modify it yourself:

import csv

result = {}
with open('/Volumes/MacDocuments/Downloads/testSheet.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        print(row)
        print(len(row))
        key = '{}-{}'.format(row[0], row[5])
        if result.get(key, None):
            result[key].extend(row[8:])
        else:
            result[key] = row

print(result)